Semantic AI for Future Industries: Bridging Explainability and
Integration in Black Box Models
(Code
and results of experiments related to the article submitted to ISM-2025: “International Conference on Industry of the Future and Smart Manufacturing”)
ABSTRACT
Artificial intelligence is
increasingly used in industrial systems, yet the widespread adoption of black
box models such as deep neural networks (NNs) presents challenges in
transparency and interoperability. This paper introduces a novel Neuro-Symbolic
eXplanation (NSX) pipeline that transforms black box analytics into explainable
and integration-enable semantic representations using SWRL rules and reasoning.
Our approach consists of several key steps: generating synthetic data; training
decision trees to approximate the behavior of NNs; converting decision trees
into SWRL rules, enabling automated and explainable ontology-based reasoning.
This transformation enhances both explainability, by making model logic
explicit, and integration, by providing a semantic framework for cross-system
interoperability. To further enhance usability, we integrate ChatGPT as an
external automated service via API for multiple tasks: mapping internal feature
representations to human-readable ontology terms; generating natural language
explanations for inferred rules; explaining classification outcomes based on
reasoner-derived results; and translating SWRL rules into SPARQL queries for
alternative reasoning. This hybrid approach is particularly valuable in
industrial contexts such as predictive maintenance, quality control, and
autonomous decision-making, where transparency and system integration are
crucial. We experimentally demonstrate NSX-pipeline’s effectiveness and discuss
its implications for future industries.
CODE AND EXPERIMENTS
Experiments
with IRIS (most recent, verified, and included to the article)
# EXPERIMENTS
WITH DECISION TREE DEPTH=3
import numpy as np
import pandas as pd
from sklearn.datasets
import load_iris
from sklearn.model_selection
import train_test_split
from sklearn.preprocessing
import StandardScaler
from sklearn.neural_network
import MLPClassifier
from sklearn.tree
import DecisionTreeClassifier,
export_text
import re
def load_and_process_data():
"""Loads
and preprocesses the Iris dataset."""
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names
# Create feature
mapping dictionary
feature_mapping = {f"f{i}": (f"hasF{i}",
feature_names[i]) for i in range(len(feature_names))}
# Create class
mapping dictionary
class_mapping = {f"class: {i}": (f"Class_{i}", class_names[i]) for i in range(len(class_names))}
# Split dataset
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.2, random_state=42)
# Normalize
features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, X_test, y_train, y_test, feature_mapping, class_mapping, X.shape[1], scaler, X, y
def train_models(X_train, X_test, y_train, y_test, X_shape, scaler, X_original, y_original, num_synthetic_samples=None):
"""Trains
a neural network and a decision tree."""
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42, alpha=0.01)
mlp.fit(X_train,
y_train)
print(f'NN Test
Accuracy: {mlp.score(X_test, y_test):.4f}')
# ----------
Refined Stage 2: Generate Synthetic Samples ----------
def generate_synthetic_samples(model, X_shape, scaler, num_samples_per_class):
"""Generates synthetic samples using a trained neural
network."""
synthetic_samples = []
synthetic_labels = []
# Get min/max feature ranges from original (scaled) data
feature_ranges = [(scaler.inverse_transform(np.array([[X_train[:, i].min() if j == i else 0 for j in range(X_shape)]]))[0][i], scaler.inverse_transform(np.array([[X_train[:, i].max() if j == i else 0 for j in range(X_shape)]]))[0][i]) for i in range(X_shape)]
class_counts
= [0, 0, 0] # Keep track of
number of samples of each class
target_count
= num_samples_per_class if
num_samples_per_class else len(X_train) // 3
while min(class_counts)
< target_count:
#Generate a synthetic sample within the feature ranges
sample = np.array([np.random.uniform(low, high) for low, high in feature_ranges])
# Scale the sample according to our feature scaling
sample_scaled
= scaler.transform(sample.reshape(1, -1))
# Get NN probability predictions
synthetic_probs = model.predict_proba(sample_scaled)
# Select label according to the highest probability
synthetic_label = np.argmax(synthetic_probs)
# Only append if the class is below the target number
if class_counts[synthetic_label] < target_count:
synthetic_samples.append(sample)
synthetic_labels.append(synthetic_label)
class_counts[synthetic_label] += 1
return np.array(synthetic_samples),
np.array(synthetic_labels)
# Generate
synthetic data
X_synthetic, y_synthetic =
generate_synthetic_samples(mlp, X_shape, scaler,
num_synthetic_samples)
# Print the
distribution of labels in the synthetic data
print(f"Synthetic
data class distribution: {np.unique(y_synthetic,
return_counts=True)}")
# Print the
predictions of the NN in the training data
y_train_pred = mlp.predict(X_train)
print(f"NN
training data class distribution: {np.unique(y_train_pred,
return_counts=True)}")
# ---------- End
of Refined Stage 2 ----------
# Scale the synthetic
data
X_synthetic_scaled = scaler.transform(X_synthetic)
# Train decision
tree on the synthetic data
clf = DecisionTreeClassifier(max_depth=7, random_state=42)
clf.fit(X_synthetic_scaled,
y_synthetic)
# Evaluate the decision
tree on the original data
X_original_scaled = scaler.transform(X_original)
dt_accuracy = clf.score(X_original_scaled,
y_original)
print(f"Decision
Tree Accuracy on Original Iris Data: {dt_accuracy:.4f}")
return clf, mlp, X_synthetic, scaler
def generate_swrl_rules(clf, feature_names, class_mapping, scaler):
"""Generates
SWRL rules from a decision tree."""
rules = []
def recurse(node, conditions, variable_conditions, parent_bounds):
if clf.tree_.children_left[node] == -1 and
clf.tree_.children_right[node] == -1:
predicted_class = np.argmax(clf.tree_.value[node])
class_name = class_mapping[f"class: {predicted_class}"][0]
rule =
"Unclassified(?p) ^
" + " ^ ".join(sorted(conditions)) + f' -> {class_name}(?p)'
rules.append(rule)
return
feature_index = clf.tree_.feature[node]
feature =
feature_names[feature_index]
threshold = clf.tree_.threshold[node]
var = f'?x{feature_index + 1}'
new_conditions = conditions.copy()
new_variable_conditions =
variable_conditions.copy()
#Add the feature
condition if not already present
if var not in variable_conditions:
new_conditions.append(f'has{feature.capitalize().replace(" ", "")}(?p, {var})')
new_variable_conditions.add(var)
#Inverse transform
the thresholds to the original scale
reference_vector = np.zeros((1,len(feature_names)))
reference_vector[0][feature_index] = threshold
threshold_original_scale =
scaler.inverse_transform(reference_vector)[0][feature_index]
left_condition = f'swrlb:lessThanOrEqual({var}, {threshold_original_scale:.2f})'
right_condition = f'swrlb:greaterThan({var}, {threshold_original_scale:.2f})'
new_parent_bounds =
parent_bounds.copy()
# Avoid redundant
conditions
if not any(v == var and op == "leq" and threshold >= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "leq", threshold))
if not any(v == var and op == "gt" and threshold <= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "gt", threshold))
recurse(clf.tree_.children_left[node],
new_conditions + [left_condition], new_variable_conditions.copy(),
new_parent_bounds)
recurse(clf.tree_.children_right[node],
new_conditions + [right_condition], new_variable_conditions.copy(),
new_parent_bounds)
recurse(0, [], set(), [])
return rules
def optimize_swrl_rule(rule):
"""Optimizes
a SWRL rule by removing redundant conditions."""
parts = rule.split('->')
if len(parts) !=
2:
return rule # If no class, return the rule.
left_part = parts[0].strip()
right_part = parts[1].strip()
parts = left_part.split(' ^ ')
has_conditions = [part for part in parts if part.startswith('has')]
swrl_conditions = [part for part in parts if part.startswith('swrlb:')]
var_conditions = {}
for cond in swrl_conditions:
match = re.match(r'swrlb:(lessThanOrEqual|greaterThanOrEqual|lessThan|greaterThan)\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match:
var = match.group('variable')
op = match.group(1)
value = float(match.group('value'))
if var not in var_conditions:
var_conditions[var] = []
var_conditions[var].append((op, value, cond))
optimized_conditions = []
for var, conditions in var_conditions.items():
# Group conditions
by operator type
leq_conditions = [cond for op, _, cond in conditions if op == "lessThanOrEqual"]
geq_conditions = [cond for op, _, cond in conditions if op == "greaterThanOrEqual"]
lt_conditions
= [cond for op, _, cond in conditions if op == "lessThan"]
gt_conditions
= [cond for op, _, cond in conditions if op == "greaterThan"]
# Optimize each
group separately
if leq_conditions:
best_leq =
leq_conditions[0]
for cond in leq_conditions:
match = re.match(r'swrlb:lessThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) < float(re.match(r'swrlb:lessThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_leq).group('value')):
best_leq = cond
optimized_conditions.append(best_leq)
if geq_conditions:
best_geq =
geq_conditions[0]
for cond in geq_conditions:
match = re.match(r'swrlb:greaterThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) > float(re.match(r'swrlb:greaterThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_geq).group('value')):
best_geq = cond
optimized_conditions.append(best_geq)
if lt_conditions:
best_lt =
lt_conditions[0]
for cond in lt_conditions:
match = re.match(r'swrlb:lessThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) < float(re.match(r'swrlb:lessThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_lt).group('value')):
best_lt = cond
optimized_conditions.append(best_lt)
if gt_conditions:
best_gt =
gt_conditions[0]
for cond in gt_conditions:
match = re.match(r'swrlb:greaterThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) > float(re.match(r'swrlb:greaterThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_gt).group('value')):
best_gt = cond
optimized_conditions.append(best_gt)
optimized_rule = "Unclassified(?p) ^ " + " ^ ".join(sorted(has_conditions + optimized_conditions)) + " -> " + right_part
return optimized_rule
def optimize_inter_swrl_rules(rules):
"""Optimizes
SWRL rules by removing inter-rule redundancies, considering condition
specificity."""
def is_specialization(rule1, rule2):
"""Check
if rule1 is a specialization of rule2"""
parts1 = rule1.split("->")[0].strip().split(" ^ ")
parts2 = rule2.split("->")[0].strip().split(" ^ ")
#Remove Unclassified from the rules
parts1 = [part for part in parts1 if part != "Unclassified(?p)"]
parts2 = [part for part in parts2 if part != "Unclassified(?p)"]
if len(parts1) < len(parts2):
return False # If rule1 has fewer
conditions, it can't be a specialization.
if not all(cond in parts1 for cond in parts2):
return False # Rule1 must include all
conditions of rule2
if len(parts1) == len(parts2):
return False # If the rules have the same length, it can't be a specialization
swrl_conditions1 = [part for part in parts1 if part.startswith('swrlb:')]
swrl_conditions2 = [part for part in parts2 if part.startswith('swrlb:')]
var_conditions1 = {}
for cond in swrl_conditions1:
match = re.match(r'swrlb:(lessThanOrEqual|greaterThanOrEqual|lessThan|greaterThan)\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match:
var = match.group('variable')
op = match.group(1)
value = float(match.group('value'))
if var not in var_conditions1:
var_conditions1[var] = []
var_conditions1[var].append((op, value, cond))
var_conditions2 = {}
for cond in swrl_conditions2:
match = re.match(r'swrlb:(lessThanOrEqual|greaterThanOrEqual|lessThan|greaterThan)\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match:
var = match.group('variable')
op = match.group(1)
value = float(match.group('value'))
if var not in var_conditions2:
var_conditions2[var] = []
var_conditions2[var].append((op, value, cond))
for var, conditions1 in
var_conditions1.items():
if var in var_conditions2:
conditions2 = var_conditions2[var]
#If there are conditions on both rules, lets make sure
there is one more specific condition
min_leq1 = float('inf')
max_geq1 = float('-inf')
has_leq1 = False
has_geq1 = False
for op, value, cond in conditions1:
if op == "lessThanOrEqual":
min_leq1 = value
has_leq1 = True
elif op == "greaterThanOrEqual":
max_geq1 = value
has_geq1 = True
min_leq2 = float('inf')
max_geq2 = float('-inf')
has_leq2 = False
has_geq2 = False
for op, value, cond in conditions2:
if op == "lessThanOrEqual":
min_leq2 = value
has_leq2 = True
elif op == "greaterThanOrEqual":
max_geq2 = value
has_geq2 = True
if has_leq2 and not has_leq1:
return True
if has_geq2 and not has_geq1:
return True
if has_leq1 and has_leq2 and min_leq1 < min_leq2:
return True
if has_geq1 and has_geq2 and max_geq1 > max_geq2:
return True
return False #If no
specialization is found, then the rules are independent.
optimized_rules = []
for i, rule1 in enumerate(rules):
is_redundant = False
for j, rule2 in enumerate(rules):
if i!=j and rule1.split("->")[1].strip() == rule2.split("->")[1].strip() and is_specialization(rule1,
rule2):
is_redundant
= True
break
if not is_redundant:
optimized_rules.append(rule1)
return optimized_rules
# Main execution
X_train, X_test, y_train, y_test, feature_mapping,
class_mapping, X_shape, scaler, X_original, y_original = load_and_process_data()
clf, mlp, X_synthetic, scaler = train_models(X_train, X_test, y_train, y_test, X_shape, scaler,
X_original, y_original, num_synthetic_samples=400)
# Print decision tree rules
print('\nExtracted Decision Tree Rules:\n')
print(export_text(clf, feature_names=[f"f{i}" for i in range(X_synthetic.shape[1])]))
for key, (swrl_name, actual_name)
in feature_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of attribute in the
original dataset);")
print('\n')
for key, (swrl_name, actual_name)
in class_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of class in the
original dataset);")
# Generate and print SWRL rules
swrl_rules = generate_swrl_rules(clf,
[f"f{i}" for i in range(X_synthetic.shape[1])],
class_mapping, scaler)
print('\nGenerated SWRL Rules:\n')
for rule in swrl_rules:
print(rule)
# Optimize SWRL rules
optimized_rules = [optimize_swrl_rule(rule) for rule in swrl_rules]
print('\nOptimized SWRL Rules (Intra-Rule Redundancy Removed):\n')
for rule in optimized_rules:
print(rule)
optimized_rules =
optimize_inter_swrl_rules(optimized_rules)
print('\nOptimized SWRL Rules (Inter-Rule Redundancy Removed):\n')
for rule in optimized_rules:
print(rule)
NN Test Accuracy: 1.0000
Synthetic data class distribution: (array([0, 1, 2]), array([400, 400, 400]))
NN training data class distribution: (array([0, 1, 2]), array([40, 39, 41]))
Decision Tree Accuracy on Original Iris
Data: 0.9000
Extracted Decision Tree
with normalized attributes:
|--- f3 <= 0.70
| |--- f0 <= 0.40
| |
|--- f1 <= 0.04
| |
| |--- class: 1
| |
|--- f1 >
0.04
| |
| |--- class: 0
| |--- f0 > 0.40
| |
|--- f1 <= 1.84
| |
| |--- class: 1
| |
|--- f1 >
1.84
| |
| |--- class: 1
|--- f3 > 0.70
| |--- f2 <= 0.15
| |
|--- f1 <= 0.27
| |
| |--- class: 2
| |
|--- f1 >
0.27
| |
| |--- class: 0
| |--- f2 > 0.15
| |
|--- f1 <= 1.65
| |
| |--- class: 2
| |
|--- f1 >
1.65
| |
| |--- class: 2
Name Mappings:
f0 (Decision tree); hasF0 (SWRL rules);
sepal length (cm) (name of attribute in the original dataset);
f1 (Decision tree); hasF1 (SWRL rules);
sepal width (cm) (name of attribute in the original dataset);
f2 (Decision tree); hasF2 (SWRL rules);
petal length (cm) (name of attribute in the original dataset);
f3 (Decision tree); hasF3 (SWRL rules);
petal width (cm) (name of attribute in the original dataset);
class: 0 (Decision tree); Class_0 (SWRL
rules); setosa (name of class in the original dataset);
class: 1 (Decision tree); Class_1 (SWRL
rules); versicolor (name of class in the original dataset);
class: 2 (Decision tree); Class_2 (SWRL
rules); virginica (name of class in the original dataset);
Generated
SWRL Rules:
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x1,
6.14) ^ swrlb:lessThanOrEqual(?x2, 3.08) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_1(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.08) ^ swrlb:lessThanOrEqual(?x1, 6.14) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_0(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x1,
6.14) ^ swrlb:lessThanOrEqual(?x2, 3.89) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_1(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x1,
6.14) ^ swrlb:greaterThan(?x2, 3.89) ^ swrlb:lessThanOrEqual(?x4, 1.70) ->
Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4,
1.70) ^ swrlb:lessThanOrEqual(?x2, 3.18) ^ swrlb:lessThanOrEqual(?x3, 4.00)
-> Class_2(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.18) ^ swrlb:greaterThan(?x4, 1.70) ^ swrlb:lessThanOrEqual(?x3, 4.00) ->
Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x3,
4.00) ^ swrlb:greaterThan(?x4, 1.70) ^ swrlb:lessThanOrEqual(?x2, 3.80) ->
Class_2(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.80) ^ swrlb:greaterThan(?x3, 4.00) ^ swrlb:greaterThan(?x4, 1.70) ->
Class_2(?p)
Optimized SWRL Rules (Intra-Rule Redundancy Removed):
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x1,
6.14) ^ swrlb:lessThanOrEqual(?x2, 3.08) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_1(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.08) ^ swrlb:lessThanOrEqual(?x1, 6.14) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_0(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x1,
6.14) ^ swrlb:lessThanOrEqual(?x2, 3.89) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_1(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x1,
6.14) ^ swrlb:greaterThan(?x2, 3.89) ^ swrlb:lessThanOrEqual(?x4, 1.70) ->
Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4,
1.70) ^ swrlb:lessThanOrEqual(?x2, 3.18) ^ swrlb:lessThanOrEqual(?x3, 4.00)
-> Class_2(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.18) ^ swrlb:greaterThan(?x4, 1.70) ^ swrlb:lessThanOrEqual(?x3, 4.00) ->
Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x3,
4.00) ^ swrlb:greaterThan(?x4, 1.70) ^ swrlb:lessThanOrEqual(?x2, 3.80) ->
Class_2(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.80) ^ swrlb:greaterThan(?x3, 4.00) ^ swrlb:greaterThan(?x4, 1.70) ->
Class_2(?p)
Optimized SWRL Rules (Inter-Rule Redundancy Removed):
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x1,
6.14) ^ swrlb:lessThanOrEqual(?x2, 3.08) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_1(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.08) ^ swrlb:lessThanOrEqual(?x1, 6.14) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_0(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x1,
6.14) ^ swrlb:lessThanOrEqual(?x2, 3.89) ^ swrlb:lessThanOrEqual(?x4, 1.70)
-> Class_1(?p)
Unclassified(?p)
^ hasF0(?p, ?x1) ^ hasF1(?p, ?x2) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x1,
6.14) ^ swrlb:greaterThan(?x2, 3.89) ^ swrlb:lessThanOrEqual(?x4, 1.70) ->
Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4,
1.70) ^ swrlb:lessThanOrEqual(?x2, 3.18) ^ swrlb:lessThanOrEqual(?x3, 4.00)
-> Class_2(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.18) ^ swrlb:greaterThan(?x4, 1.70) ^ swrlb:lessThanOrEqual(?x3, 4.00) ->
Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x3,
4.00) ^ swrlb:greaterThan(?x4, 1.70) ^ swrlb:lessThanOrEqual(?x2, 3.80) ->
Class_2(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ hasF2(?p, ?x3) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x2,
3.80) ^ swrlb:greaterThan(?x3, 4.00) ^ swrlb:greaterThan(?x4, 1.70) ->
Class_2(?p)
Protégé-Friendly SWRL Rules:
Unclassified_Iris(?p),
sepal_length(?p, ?x1), sepal_width(?p, ?x2), petal_width(?p, ?x4),
lessThanOrEqual(?x1, 6.14), lessThanOrEqual(?x2, 3.08), lessThanOrEqual(?x4,
1.70) -> Versicolor(?p)
Unclassified_Iris(?p),
sepal_length(?p, ?x1), sepal_width(?p, ?x2), petal_width(?p, ?x4),
greaterThan(?x2, 3.08), lessThanOrEqual(?x1, 6.14), lessThanOrEqual(?x4, 1.70)
-> Setosa(?p)
Unclassified_Iris(?p),
sepal_length(?p, ?x1), sepal_width(?p, ?x2), petal_width(?p, ?x4),
greaterThan(?x1, 6.14), lessThanOrEqual(?x2, 3.89), lessThanOrEqual(?x4, 1.70)
-> Versicolor(?p)
Unclassified_Iris(?p),
sepal_length(?p, ?x1), sepal_width(?p, ?x2), petal_width(?p, ?x4),
greaterThan(?x1, 6.14), greaterThan(?x2, 3.89), lessThanOrEqual(?x4, 1.70)
-> Versicolor(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4),
greaterThan(?x4, 1.70), lessThanOrEqual(?x2, 3.18), lessThanOrEqual(?x3, 4.00)
-> Virginica(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4),
greaterThan(?x2, 3.18), greaterThan(?x4, 1.70), lessThanOrEqual(?x3, 4.00)
-> Setosa(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4),
greaterThan(?x3, 4.00), greaterThan(?x4, 1.70), lessThanOrEqual(?x2, 3.80)
-> Virginica(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4),
greaterThan(?x2, 3.80), greaterThan(?x3, 4.00), greaterThan(?x4, 1.70) -> Virginica(?p)
MIN-MAX
ranges for each Iris attribute are:
sepal length (f0): min = 4.3, max = 7.9
sepal width (f1): min = 2.0, max = 4.4
petal length (f2): min = 1.0, max = 6.9
petal width (f3): min = 0.1, max = 2.5
ChatGPTInterpretations and Explanations of the Rule
Set:
Rule 1:
If a flower has:
•
Sepal
length ≤ 6.14 cm
•
Sepal
width ≤ 3.08 cm
•
Petal
width ≤ 1.70 cm
Then it is classified as Versicolor.
Interpretation: Flowers with a relatively short sepal length (≤
6.14 cm), narrower sepals (≤ 3.08 cm), and small petal width (≤
1.70 cm) are identified as Iris versicolor.
Rule 2:
If a flower has:
•
Sepal
length ≤ 6.14 cm
•
Sepal
width > 3.08 cm
•
Petal
width ≤ 1.70 cm
Then it is classified as Setosa.
Interpretation: Among flowers with small petal width (≤ 1.70
cm) and shorter sepal length (≤ 6.14 cm), those with wider sepals (>
3.08 cm) are classified as setosa instead of versicolor.
Rule 3:
If a flower has:
•
Sepal length
> 6.14 cm
•
Sepal
width ≤ 3.89 cm
•
Petal
width ≤ 1.70 cm
Then it is classified as Versicolor.
Interpretation: When a flower has a longer sepal length (> 6.14 cm), but still maintains a moderate sepal width (≤ 3.89
cm) and small petal width (≤ 1.70 cm), it is classified as versicolor.
Rule 4:
If a flower has:
•
Sepal
length > 6.14 cm
•
Sepal
width > 3.89 cm
•
Petal
width ≤ 1.70 cm
Then it is classified as Versicolor.
Interpretation: Even if the sepal width increases beyond 3.89 cm, as
long as the petal width remains small (≤ 1.70 cm), the flower remains
classified as versicolor.
Rule 5:
If a flower has:
•
Sepal
width ≤ 3.18 cm
•
Petal
length ≤ 4.00 cm
•
Petal
width > 1.70 cm
Then it is classified as Virginica.
Interpretation: Flowers with wider petals (> 1.70 cm) but
relatively short sepals (≤ 3.18 cm) and short petals (≤ 4.00 cm)
belong to the virginica class.
Rule 6:
If a flower has:
•
Sepal
width > 3.18 cm
•
Petal
length ≤ 4.00 cm
•
Petal
width > 1.70 cm
Then it is classified as Setosa.
Interpretation: Among flowers with wide petals (> 1.70 cm) and
short petals (≤ 4.00 cm), those with a wider sepal (> 3.18 cm) are
classified as setosa instead of virginica.
Rule 7:
If a flower has:
•
Sepal
width ≤ 3.80 cm
•
Petal
length > 4.00 cm
•
Petal
width > 1.70 cm
Then it is classified as Virginica.
Interpretation: If the petal length is relatively long (> 4.00 cm)
and the petal width is also large (> 1.70 cm), but the sepal width is not
too wide (≤ 3.80 cm), then the flower is classified as virginica.
Rule 8:
If a flower has:
•
Sepal
width > 3.80 cm
•
Petal
length > 4.00 cm
•
Petal
width > 1.70 cm
Then it is classified as Virginica.
Interpretation: Even if the sepal width is wider than 3.80 cm, as long as the petals are long (> 4.00 cm) and wide (>
1.70 cm), the flower is still classified as virginica.
Rule set analysis
The given SWRL rule set for classifying
Iris flowers based on sepal and petal measurements exhibits a structured
approach to decision-making. Below is an analysis of its consistency,
completeness, and non-redundancy.
Consistency: The rule set appears logically consistent, meaning
that no two conflicting rules classify the same instance into different classes
under identical conditions. Each rule follows a structured decision path,
ensuring that a flower satisfying specific constraints is assigned to exactly
one class. The thresholds used for feature comparisons are clearly defined and
mutually exclusive where needed, preventing contradictions in classification.
Completeness: The rule set covers a wide range of conditions
across different feature values, ensuring that flowers from all three Iris
species (setosa, versicolor, and virginica) are classified. However,
completeness depends on whether all possible feature combinations are accounted
for. While most relevant feature intervals seem covered, there may be some
unclassified edge cases, particularly if a flower falls outside the explicitly
defined constraints. This could lead to some instances remaining unclassified,
necessitating an additional fallback rule or refinement to ensure total
coverage.
Non-redundancy: The rules are mostly non-redundant, as each rule
contributes to a specific decision boundary. However, there is some potential
redundancy in the classification of versicolor when sepal width and petal width
constraints are relaxed (Rules 1, 3, and 4). Rules 3 and 4 both classify
flowers as versicolor when petal width is ≤ 1.70 cm, but Rule 4 does so
regardless of sepal width. This suggests that one of these conditions might be
unnecessary. Similarly, Rules 7 and 8 both classify flowers as virginica, with
Rule 8 being more general due to the relaxed constraint on sepal width, making
Rule 7 potentially redundant.
Therefore, the rule set is logically
consistent and generally complete, though minor refinements may be needed to
ensure every possible case is covered. While largely non-redundant, some rules
might be simplified or merged without altering classification accuracy. These
refinements would enhance the efficiency of the rule set without compromising
correctness.
Explainability
See below how ChatGPT, as an Explainable
AI, classifies each of five test samples below based on the given SWRL rule set
and provide an explanation of the decision-making process:
Test Sample 1:
{sepal length (cm) = 5.8; sepal width (cm) = 3.0; petal length (cm) =
4.2; petal width (cm) = 1.3}
Classification: Versicolor
Explanation: The sepal length (5.8 cm) is ≤ 6.14 cm. The sepal
width (3.0 cm) is ≤ 3.08 cm. The petal width (1.3 cm) is ≤ 1.70 cm.
These values match the conditions in Rule 1, which classifies the flower as
versicolor.
Interpretation: Since the sepal length and width fall
within the threshold and the petal width is relatively small, the flower is
classified as Iris versicolor.
Test Sample 2:
{sepal length (cm) = 6.5; sepal width (cm) = 3.5; petal length (cm) =
5.0; petal width (cm) = 1.6}
Classification: Versicolor
Explanation: The sepal length (6.5 cm) is > 6.14 cm. The sepal
width (3.5 cm) is ≤ 3.89 cm.
The petal width (1.6 cm) is ≤ 1.70
cm. These values match Rule 3, which classifies the flower as Versicolor. Interpretation: The flower has a
slightly larger sepal length but remains within the range where Versicolor is
assigned, as petal width and sepal width still meet the necessary conditions.
Test Sample 3:
{sepal length (cm) = 5.5; sepal width (cm) = 3.5;
petal length (cm) = 1.5; petal width (cm) = 0.2}
Classification: Setosa
Explanation: The sepal length (5.5 cm) is ≤ 6.14 cm. The
sepal width (3.5 cm) is > 3.08 cm. The petal width (0.2 cm) is ≤ 1.70
cm. These values match Rule 2, which classifies the flower as setosa.
Interpretation: The flower has a relatively small petal width and
falls into the region where Iris setosa is typically found. The decision is
mainly influenced by the fact that sepal width exceeds 3.08 cm while the other
conditions remain in an acceptable range.
Test Sample 4:
{sepal length (cm) = 6.8; sepal width (cm) = 3.2; petal length (cm) =
5.5; petal width (cm) = 2.2}
Classification: Virginica
Explanation: The sepal width (3.2 cm) is ≤ 3.80 cm. The
petal length (5.5 cm) is > 4.00 cm.
The petal width (2.2 cm) is > 1.70
cm. These values match Rule 8, which classifies the flower as virginica.
Interpretation: The larger petal width and petal
length contribute significantly to classifying this flower as Iris virginica,
aligning with the expected characteristics of this species.
Test Sample 5:
{sepal length (cm) = 5.9; sepal width (cm) = 3.4;
petal length (cm) = 4.8; petal width (cm) = 1.8}
Classification: Not classified by current rules
Explanation: The petal width (1.8 cm) is greater than the 1.70 cm
threshold used in all versicolor classification rules. The petal length (4.8
cm) is within the range of versicolor, but there is no existing rule covering
this combination. The current rules do not specify a condition for flowers with
petal width > 1.70 cm and petal length ≤ 4.89 cm, meaning this case
remains unclassified.
Interpretation: This test sample falls outside the
explicit classification boundaries of the rule set. The rules may need an
additional condition to handle such borderline cases.
Through these classifications, it is
demonstrated how the rule set systematically determines the Iris species based
on the given measurements. However, borderline cases highlight potential gaps
in the rules originated to the limitation put to decision tree learning
hyperparameters (depth), suggesting areas for refinement.
In the IRIS ontology (http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL.owl), all unlabelled flower samples
initially belong to the upper class “Unclassified”. A reasoner in Protégé
executes the given rule set, systematically evaluating each sample based on
sepal and petal measurements. When a sample satisfies the conditions of a rule,
it is moved from the “Unclassified” category to one of the three more specific
subclasses: Setosa, Versicolor, or Virginica. This classification process
relies entirely on the defined rules to determine the most appropriate category
for each instance. However, the rule-based classification is inherently
constrained by the conditions set within the ontology. If a sample’s
characteristics do not match any of the existing rules, it remains in the “Unclassified”
category, as no logical inference can be made to assign it to a more specific
subclass. An example of this occurs with Test Sample 5, which has the following
measurements:
•
Sepal
Length: 5.9 cm
•
Sepal
Width: 3.4 cm
•
Petal
Length: 4.8 cm
•
Petal
Width: 1.8 cm
Upon evaluation, Test Sample 5 does not satisfy the
conditions for any of the defined classification rules. The key reason lies in
its petal width (1.8 cm), which exceeds the threshold of 1.70 cm used in all
versicolor rules, yet does not meet the requirements for virginica
classification, which typically considers petal width and length together.
Since none of the rules explicitly account for flowers with this specific
combination of characteristics, the reasoner does not have sufficient
information to place it within any of the three subclasses. This situation
highlights a potential incompleteness in the rule set. If remaining
unclassified instances like Test Sample 5 are undesirable, an additional rule
or refinement could be introduced to better capture borderline cases, ensuring
a more exhaustive classification process within the ontology.
This situation highlights a natural
consequence of the decision tree depth limitation (3) imposed in our
experiments. Given this constraint, certain regions of the feature space may
remain unclassified, as the rules are derived from a decision tree of limited
depth. Rather than being an indicator of incompleteness in the rule set, this
reflects a controlled trade-off between classification granularity and
interpretability. The unclassified areas represent legal decision spaces where
further refinement would require deeper rule structures. In practical
applications, such unclassified cases could either be handled by additional
rules, a fallback mechanism, or left intentionally ambiguous to maintain the
simplicity and transparency of the ontology.
Resulting
ontology is available in:
http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL.owl
# EXPERIMENTS
WITH DECISION TREE DEPTH=4
import numpy as np
import pandas as pd
from sklearn.datasets
import load_iris
from sklearn.model_selection
import train_test_split
from sklearn.preprocessing
import StandardScaler
from sklearn.neural_network
import MLPClassifier
from sklearn.tree
import DecisionTreeClassifier,
export_text
import re
def load_and_process_data():
"""Loads
and preprocesses the Iris dataset."""
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names
# Create feature
mapping dictionary
feature_mapping = {f"f{i}": (f"hasF{i}",
feature_names[i]) for i in range(len(feature_names))}
# Create class
mapping dictionary
class_mapping = {f"class: {i}": (f"Class_{i}", class_names[i]) for i in range(len(class_names))}
# Split dataset
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.2, random_state=42)
# Normalize
features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, X_test, y_train, y_test, feature_mapping, class_mapping, X.shape[1], scaler, X, y
def train_models(X_train, X_test, y_train, y_test, X_shape, scaler, X_original, y_original, num_synthetic_samples=None):
"""Trains
a neural network and a decision tree."""
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42, alpha=0.01)
mlp.fit(X_train,
y_train)
print(f'NN Test
Accuracy: {mlp.score(X_test, y_test):.4f}')
# ----------
Refined Stage 2: Generate Synthetic Samples ----------
def generate_synthetic_samples(model, X_shape, scaler, num_samples_per_class):
"""Generates synthetic samples using a trained neural
network."""
synthetic_samples = []
synthetic_labels = []
# Get min/max feature ranges from original (scaled) data
feature_ranges = [(scaler.inverse_transform(np.array([[X_train[:, i].min() if j == i else 0 for j in range(X_shape)]]))[0][i], scaler.inverse_transform(np.array([[X_train[:, i].max() if j == i else 0 for j in range(X_shape)]]))[0][i]) for i in range(X_shape)]
class_counts
= [0, 0, 0] # Keep track of
number of samples of each class
target_count
= num_samples_per_class if
num_samples_per_class else len(X_train) // 3
while min(class_counts)
< target_count:
#Generate a synthetic sample within the feature ranges
sample = np.array([np.random.uniform(low, high) for low, high in feature_ranges])
# Scale the sample according to our feature scaling
sample_scaled
= scaler.transform(sample.reshape(1, -1))
# Get NN probability predictions
synthetic_probs = model.predict_proba(sample_scaled)
# Select label according to the highest probability
synthetic_label = np.argmax(synthetic_probs)
# Only append if the class is below the target number
if class_counts[synthetic_label] < target_count:
synthetic_samples.append(sample)
synthetic_labels.append(synthetic_label)
class_counts[synthetic_label] += 1
return np.array(synthetic_samples),
np.array(synthetic_labels)
# Generate
synthetic data
X_synthetic, y_synthetic = generate_synthetic_samples(mlp, X_shape, scaler, num_synthetic_samples)
# Print the
distribution of labels in the synthetic data
print(f"Synthetic
data class distribution: {np.unique(y_synthetic,
return_counts=True)}")
# Print the predictions
of the NN in the training data
y_train_pred = mlp.predict(X_train)
print(f"NN
training data class distribution: {np.unique(y_train_pred,
return_counts=True)}")
# ---------- End
of Refined Stage 2 ----------
# Scale the synthetic
data
X_synthetic_scaled = scaler.transform(X_synthetic)
# Train decision
tree on the synthetic data
clf = DecisionTreeClassifier(max_depth=4, random_state=42)
clf.fit(X_synthetic_scaled,
y_synthetic)
# Evaluate the
decision tree on the original data
X_original_scaled = scaler.transform(X_original)
dt_accuracy = clf.score(X_original_scaled,
y_original)
print(f"Decision
Tree Accuracy on Original Iris Data: {dt_accuracy:.4f}")
return clf, mlp, X_synthetic, scaler
def generate_swrl_rules(clf, feature_names, class_mapping, scaler):
"""Generates
SWRL rules from a decision tree."""
rules = []
def recurse(node, conditions, variable_conditions, parent_bounds):
if clf.tree_.children_left[node] == -1 and
clf.tree_.children_right[node] == -1:
predicted_class = np.argmax(clf.tree_.value[node])
class_name = class_mapping[f"class: {predicted_class}"][0]
rule =
"Unclassified(?p) ^
" + " ^ ".join(sorted(conditions)) + f' -> {class_name}(?p)'
rules.append(rule)
return
feature_index = clf.tree_.feature[node]
feature =
feature_names[feature_index]
threshold = clf.tree_.threshold[node]
var = f'?x{feature_index + 1}'
new_conditions = conditions.copy()
new_variable_conditions =
variable_conditions.copy()
#Add the feature
condition if not already present
if var not in variable_conditions:
new_conditions.append(f'has{feature.capitalize().replace(" ", "")}(?p, {var})')
new_variable_conditions.add(var)
#Inverse transform
the thresholds to the original scale
reference_vector = np.zeros((1,len(feature_names)))
reference_vector[0][feature_index] = threshold
threshold_original_scale =
scaler.inverse_transform(reference_vector)[0][feature_index]
left_condition = f'swrlb:lessThanOrEqual({var}, {threshold_original_scale:.2f})'
right_condition = f'swrlb:greaterThan({var}, {threshold_original_scale:.2f})'
new_parent_bounds =
parent_bounds.copy()
# Avoid redundant
conditions
if not any(v == var and op == "leq" and threshold >= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "leq", threshold))
if not any(v == var and op == "gt" and threshold <= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "gt", threshold))
recurse(clf.tree_.children_left[node],
new_conditions + [left_condition], new_variable_conditions.copy(),
new_parent_bounds)
recurse(clf.tree_.children_right[node],
new_conditions + [right_condition], new_variable_conditions.copy(),
new_parent_bounds)
recurse(0, [], set(), [])
return rules
def optimize_swrl_rule(rule):
"""Optimizes
a SWRL rule by removing redundant conditions."""
parts = rule.split('->')
if len(parts) !=
2:
return rule # If no class, return the rule.
left_part = parts[0].strip()
right_part = parts[1].strip()
parts = left_part.split(' ^ ')
has_conditions = [part for part in parts if part.startswith('has')]
swrl_conditions = [part for part in parts if part.startswith('swrlb:')]
var_conditions = {}
for cond in swrl_conditions:
match = re.match(r'swrlb:(lessThanOrEqual|greaterThanOrEqual|lessThan|greaterThan)\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match:
var = match.group('variable')
op = match.group(1)
value = float(match.group('value'))
if var not in var_conditions:
var_conditions[var] = []
var_conditions[var].append((op, value, cond))
optimized_conditions = []
for var, conditions in var_conditions.items():
# Group conditions
by operator type
leq_conditions = [cond for op, _, cond in conditions if op == "lessThanOrEqual"]
geq_conditions = [cond for op, _, cond in conditions if op == "greaterThanOrEqual"]
lt_conditions
= [cond for op, _, cond in conditions if op == "lessThan"]
gt_conditions
= [cond for op, _, cond in conditions if op == "greaterThan"]
# Optimize each
group separately
if leq_conditions:
best_leq =
leq_conditions[0]
for cond in leq_conditions:
match = re.match(r'swrlb:lessThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) < float(re.match(r'swrlb:lessThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_leq).group('value')):
best_leq = cond
optimized_conditions.append(best_leq)
if geq_conditions:
best_geq =
geq_conditions[0]
for cond in geq_conditions:
match = re.match(r'swrlb:greaterThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) > float(re.match(r'swrlb:greaterThanOrEqual\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_geq).group('value')):
best_geq = cond
optimized_conditions.append(best_geq)
if lt_conditions:
best_lt =
lt_conditions[0]
for cond in lt_conditions:
match = re.match(r'swrlb:lessThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) < float(re.match(r'swrlb:lessThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_lt).group('value')):
best_lt = cond
optimized_conditions.append(best_lt)
if gt_conditions:
best_gt = gt_conditions[0]
for cond in gt_conditions:
match = re.match(r'swrlb:greaterThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match and float(match.group('value')) > float(re.match(r'swrlb:greaterThan\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', best_gt).group('value')):
best_gt = cond
optimized_conditions.append(best_gt)
optimized_rule = "Unclassified(?p) ^ " + " ^ ".join(sorted(has_conditions + optimized_conditions)) + " -> " + right_part
return optimized_rule
def optimize_inter_swrl_rules(rules):
"""Optimizes
SWRL rules by removing inter-rule redundancies, considering condition
specificity."""
def is_specialization(rule1, rule2):
"""Check
if rule1 is a specialization of rule2"""
parts1 = rule1.split("->")[0].strip().split(" ^ ")
parts2 = rule2.split("->")[0].strip().split(" ^ ")
#Remove Unclassified from the rules
parts1 = [part for part in parts1 if part != "Unclassified(?p)"]
parts2 = [part for part in parts2 if part != "Unclassified(?p)"]
if len(parts1) < len(parts2):
return False # If rule1 has fewer
conditions, it can't be a specialization.
if not all(cond in parts1 for cond in parts2):
return False # Rule1 must include all
conditions of rule2
if len(parts1) == len(parts2):
return False # If the rules have the same length, it can't be a specialization
swrl_conditions1 = [part for part in parts1 if part.startswith('swrlb:')]
swrl_conditions2 = [part for part in parts2 if part.startswith('swrlb:')]
var_conditions1 = {}
for cond in swrl_conditions1:
match = re.match(r'swrlb:(lessThanOrEqual|greaterThanOrEqual|lessThan|greaterThan)\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match:
var = match.group('variable')
op = match.group(1)
value = float(match.group('value'))
if var not in var_conditions1:
var_conditions1[var] = []
var_conditions1[var].append((op, value, cond))
var_conditions2 = {}
for cond in swrl_conditions2:
match = re.match(r'swrlb:(lessThanOrEqual|greaterThanOrEqual|lessThan|greaterThan)\((?P<variable>\?x\d+),
(?P<value>[+-]?\d+\.?\d*)\)', cond)
if match:
var = match.group('variable')
op = match.group(1)
value = float(match.group('value'))
if var not in var_conditions2:
var_conditions2[var] = []
var_conditions2[var].append((op, value, cond))
for var, conditions1 in
var_conditions1.items():
if var in var_conditions2:
conditions2 = var_conditions2[var]
#If there are conditions on both rules, lets make sure
there is one more specific condition
min_leq1 = float('inf')
max_geq1 = float('-inf')
has_leq1 = False
has_geq1 = False
for op, value, cond in conditions1:
if op == "lessThanOrEqual":
min_leq1 = value
has_leq1 = True
elif op == "greaterThanOrEqual":
max_geq1 = value
has_geq1 = True
min_leq2 = float('inf')
max_geq2 = float('-inf')
has_leq2 = False
has_geq2 = False
for op, value, cond in conditions2:
if op == "lessThanOrEqual":
min_leq2 = value
has_leq2 = True
elif op == "greaterThanOrEqual":
max_geq2 = value
has_geq2 = True
if has_leq2 and not has_leq1:
return True
if has_geq2 and not has_geq1:
return True
if has_leq1 and has_leq2 and min_leq1 < min_leq2:
return True
if has_geq1 and has_geq2 and max_geq1 > max_geq2:
return True
return False #If no
specialization is found, then the rules are independent.
optimized_rules = []
for i, rule1 in enumerate(rules):
is_redundant = False
for j, rule2 in enumerate(rules):
if i!=j and rule1.split("->")[1].strip() == rule2.split("->")[1].strip() and is_specialization(rule1,
rule2):
is_redundant
= True
break
if not is_redundant:
optimized_rules.append(rule1)
return optimized_rules
# Main execution
X_train, X_test, y_train, y_test, feature_mapping,
class_mapping, X_shape, scaler, X_original, y_original = load_and_process_data()
clf, mlp, X_synthetic, scaler = train_models(X_train, X_test, y_train, y_test, X_shape, scaler,
X_original, y_original, num_synthetic_samples=10000)
# Print decision tree rules
print('\nExtracted Decision Tree Rules:\n')
print(export_text(clf, feature_names=[f"f{i}" for i in range(X_synthetic.shape[1])]))
for key, (swrl_name, actual_name)
in feature_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of attribute in the
original dataset);")
print('\n')
for key, (swrl_name, actual_name)
in class_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of class in the
original dataset);")
# Generate and print SWRL rules
swrl_rules = generate_swrl_rules(clf,
[f"f{i}" for i in range(X_synthetic.shape[1])],
class_mapping, scaler)
print('\nGenerated SWRL Rules:\n')
for rule in swrl_rules:
print(rule)
# Optimize SWRL rules
optimized_rules = [optimize_swrl_rule(rule) for rule in swrl_rules]
print('\nOptimized SWRL Rules (Intra-Rule Redundancy Removed):\n')
for rule in optimized_rules:
print(rule)
optimized_rules =
optimize_inter_swrl_rules(optimized_rules)
print('\nOptimized SWRL Rules (Inter-Rule Redundancy Removed):\n')
for rule in optimized_rules:
print(rule)
NN Test Accuracy: 1.0000
Synthetic data class
distribution: (array([0, 1, 2]), array([400, 400,
400]))
NN training data class distribution:
(array([0, 1, 2]), array([40, 39, 41]))
Decision Tree Accuracy
on Original Iris Data: 0.8733 / 0.8867 (10000) / 0.8933 (100000) / 0.9000
(500000)
Extracted Decision Tree Rules
(before denormalization):
|--- f3 <= 0.34
| |--- f1
<= -0.00
| | |--- f0 <= -0.89
| | |
|--- f2 <= -0.19
| | |
| |--- Setosa
| | |
|--- f2 > -0.19
| | |
| |--- Versicolor
| | |--- f0 > -0.89
| | |
|--- f2 <= 1.23
| | |
| |--- Versicolor
| | |
|--- f2 >
1.23
| | |
| |--- class: 2
| |--- f1 > -0.00
| | |--- f0 <= 0.31
| | |
|--- f2 <= 1.23
| | |
| |--- Setosa
| | |
|--- f2 >
1.23
| | |
| |--- Setosa
| | |--- f0 > 0.31
| | |
|--- f1 <= 2.30
| | |
| |--- Versicolor
| | |
|--- f1 >
2.30
| | |
| |--- Setosa
|--- f3 > 0.34
| |--- f2
<= 0.20
| | |--- f0 <= 0.53
| | |
|--- f1 <= 0.07
| | |
| |--- class: 2
| | |
|--- f1 >
0.07
| | |
| |--- Setosa
| | |--- f0 > 0.53
| | |
|--- f3 <= 1.62
| | |
| |--- Versicolor
| | |
|--- f3 >
1.62
| | |
| |--- class: 2
| |--- f2 > 0.20
| | |--- f1 <= 1.58
| | |
|--- f1 <= 0.28
| | | | |--- class: 2
| | |
|--- f1 >
0.28
| | |
| |--- class: 2
| | |--- f1 > 1.58
| | |
|--- f3 <= 1.14
| | |
| |--- Setosa
| | |
|--- f3 >
1.14
| |
| | |--- class: 2
sepal length (f0): min = 4.3, max = 7.9
sepal width (f1):
min = 2.0, max = 4.4
petal length (f2): min = 1.0, max = 6.9
petal width (f3):
min = 0.1, max = 2.5
class: 0 (Decision tree); Class_0 (SWRL rules); setosa (name of class in
the original dataset);
class: 1 (Decision tree); Class_1 (SWRL rules); versicolor (name of
class in the original dataset);
class: 2 (Decision tree); Class_2 (SWRL rules); virginica (name of class
in the original dataset);
Rule 1: (PW
≤ 1.44), (SW ≤ 3.06), (SL ≤ 5.08), (PL ≤ 3.40) ->
Setosa
Rule 2: (PW
≤ 1.44), (SW ≤ 3.06), (SL ≤ 5.08), (PL > 3.40) ->
Versicolor
Rule 3: (PW
≤ 1.44), (SW ≤ 3.06), (SL > 5.08), (PL ≤ 5.86) ->
Versicolor
Rule 4: (PW
≤ 1.44), (SW ≤ 3.06), (SL > 5.08), (PL > 5.86) ->
Virginica
Rule 5: (PW ≤ 1.44), (SW > 3.06), (SL ≤ 6.06), (PL
≤ 5.86) -> Setosa
Rule 6: (PW ≤ 1.44), (SW > 3.06), (SL ≤ 6.06), (PL
> 5.86) -> Setosa
Rule 7: (PW
≤ 1.44), (SW > 3.06), (SL > 6.06), (SW ≤ 4.09) ->
Versicolor
Rule 8: (PW
≤ 1.44), (SW > 4.09), (SL > 6.06), … ->
Setosa
Rule 9: (PW
> 1.44), (PL ≤ 4.08), (SL ≤ 6.25), (SW ≤ 3.09) ->
Virginica
Rule 10: (PW
> 1.44), (PL ≤ 4.08), (SL ≤ 6.25), (SW
> 3.09) -> Setosa
Rule 11: (PW
> 1.44), (PL ≤ 4.08), (SL > 6.25), (PW ≤ 2.40) ->
Versicolor
Rule 12:
… , (PL
≤ 4.08), (SL > 6.25), (PW > 2.40) -> Virginica
Rule 13: (PW > 1.44), (PL > 4.08), (SW ≤ 3.19), … -> Virginica
Rule 14: (PW > 1.44), (PL > 4.08), (3.19
< SW ≤ 3.77) -> Virginica
Rule 15: (PW
> 1.44), (PL > 4.08), (SW > 3.77), (PW ≤ 2.04) -> Setosa
Rule 16: …,
(PL > 4.08), (SW > 3.77), (PW > 2.04), -> Virginica
PW (petal width): min = 0.1, max = 2.5
SW (sepal width): min = 2.0, max = 4.4
PL (petal length): min = 1.0, max = 6.9
SL (sepal length): min = 4.3, max = 7.9
Extracted Decision Tree Rules
(after denormalization):
|--- PW
| |--- SW
| | |--- SL
| | | |--- PL
| | | | |--- Setosa (Rule 1)
| | | |--- PL
> 3.40
| | | | |--- Versicolor (Rule 2)
| | |--- SL > 5.08
| | | |--- PL
| | | | |--- Versicolor
(Rule 3)
| | | |--- PL > 5.86
| | | | |--- Virginica
(Rule 4)
| |--- SW
> 3.06
| | |---
SL
| | | |--- PL
| | | | |--- Setosa (Rule 5)
| | | |---
PL > 5.86
| | | | |--- Setosa
(Rule 6)
| | |--- SL > 6.06
| | | |--- SW
| | | | |--- Versicolor
(Rule 7)
| | | |--- SW
> 4.09
| | | | |--- Setosa
(Rule 8)
|--- PW > 1.44
| |--- PL
| | |---
SL
| | | |--- SW
| | | | |--- Virginica (Rule 9)
| | | |--- SW
> 3.09
| | | | |--- Setosa (Rule 10)
| | |--- SL > 6.25
| | | |--- PW
| | | | |--- Versicolor
(Rule 11)
| | | |--- PW > 2.40
| | | | |--- Virginica (Rule 12)
| |--- PL > 4.08
| | |---
SW
| | | |---
SW
| | | | |--- Virginica (Rule 13)
| | | |--- SW > 3.19
| | | | |--- Virginica
(Rule 14)
| | |--- SW > 3.77
| | | |--- PW
| | | | |--- Setosa (Rule 15)
| | | |---
PW > 2.04
| | | | |--- Virginica
(Rule 16)
Rule 1: (PW
≤ 1.44), (SW ≤ 3.06), (SL ≤ 5.08), (PL ≤ 3.40) ->
Setosa
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), lessThanOrEqual(?SL, 5.08), lessThanOrEqual(?SW, 3.06),
lessThanOrEqual(?PL, 3.40), lessThanOrEqual(?PW, 1.44) -> Setosa(?p)
Rule 2: (PW
≤ 1.44), (SW ≤ 3.06), (SL ≤ 5.08), (PL > 3.40) ->
Versicolor
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?PL, 3.40), lessThanOrEqual(?SL, 5.08),
lessThanOrEqual(?SW, 3.06), lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Rule 3: (PW
≤ 1.44), (SW ≤ 3.06), (SL > 5.08), (PL ≤ 5.86) ->
Versicolor
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?SL, 5.08), lessThanOrEqual(?SW, 3.06),
lessThanOrEqual(?PL, 5.86), lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Rule 4: (PW
≤ 1.44), (SW ≤ 3.06), (SL > 5.08), (PL > 5.86) ->
Virginica
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?SL, 5.08), greaterThan(?PL, 5.86),
lessThanOrEqual(?SW, 3.06), lessThanOrEqual(?PW, 1.44) -> Virginica(?p)
Rule 5: (PW
≤ 1.44), (SW > 3.06), (SL ≤ 6.06), (PL ≤ 5.86) ->
Setosa
Rule 6: (PW
≤ 1.44), (SW > 3.06), (SL ≤ 6.06), (PL > 5.86) ->
Setosa
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_width(?p, ?PW),
greaterThan(?SW, 3.06), lessThanOrEqual(?SL, 6.06), lessThanOrEqual(?PW, 1.44)
-> Setosa(?p)
Rule 7: (PW
≤ 1.44), (SW > 3.06), (SL > 6.06), (SW ≤ 4.09) ->
Versicolor
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_width(?p, ?PW),
greaterThan(?SL, 6.06), greaterThan(?SW, 3.06), lessThanOrEqual(?SW, 4.09),
lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Rule 8: (PW
≤ 1.44), (SW > 4.09), (SL > 6.06), … ->
Setosa
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_width(?p, ?PW),
greaterThan(?SL, 6.06), greaterThan(?SW, 4.09), lessThanOrEqual(?PW, 1.44)
-> Setosa(?p)
Rule 9: (PW
> 1.44), (PL ≤ 4.08), (SL ≤ 6.25), (SW ≤ 3.09) ->
Virginica
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?PW, 1.44), lessThanOrEqual(?SL, 6.25),
lessThanOrEqual(?SW, 3.09), lessThanOrEqual(?PL, 4.08) -> Virginica(?p)
Rule 10: (PW
> 1.44), (PL ≤ 4.08), (SL ≤ 6.25), (SW > 3.09) -> Setosa
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?SW, 3.09), greaterThan(?PW, 1.44),
lessThanOrEqual(?SL, 6.25), lessThanOrEqual(?PL, 4.08) -> Setosa(?p)
Rule 11: (PW
> 1.44), (PL ≤ 4.08), (SL > 6.25), (PW ≤ 2.40) ->
Versicolor
Unclassified_Iris(?p),
sepal_length (?p, ?SL), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SL, 6.25), greaterThan(?PW, 1.44), lessThanOrEqual(?PL, 4.08),
lessThanOrEqual(?PW, 2.40) -> Versicolor(?p)
Rule 12:
… , (PL
≤ 4.08), (SL > 6.25), (PW > 2.40) -> Virginica
Unclassified_Iris(?p),
sepal_length (?p, ?SL), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SL, 6.25), greaterThan(?PW, 2.40), lessThanOrEqual(?PL, 4.08)
-> Virginica(?p)
Rule 13: (PW
> 1.44), (PL > 4.08), (SW ≤ 3.19), … ->
Virginica
Rule 14: (PW
> 1.44), (PL > 4.08), (3.19 < SW ≤ 3.77) ->
Virginica
Unclassified_Iris(?p),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?PL,
4.08), greaterThan(?PW, 1.44), lessThanOrEqual(?SW, 3.77) -> Virginica(?p)
Rule 15: (PW
> 1.44), (PL > 4.08), (SW > 3.77), (PW ≤ 2.04) -> Setosa
Unclassified_Iris(?p),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SW,
3.77), greaterThan(?PL, 4.08), greaterThan(?PW, 1.44), lessThanOrEqual(?PW,
2.04) -> Setosa(?p)
Rule 16: …,
(PL > 4.08), (SW > 3.77), (PW > 2.04), -> Virginica
Unclassified_Iris(?p),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SW,
3.77), greaterThan(?PL, 4.08), greaterThan(?PW, 2.04) -> Virginica(?p)
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
Rule 1: (PW
≤ 1.44), (SW ≤ 3.06), (SL ≤ 5.08), (PL ≤ 3.40) ->
Setosa
Rule 2: (PW
≤ 1.44), (SW ≤ 3.06), (SL ≤ 5.08), (PL > 3.40) ->
Versicolor
Rule 3: (PW
≤ 1.44), (SW ≤ 3.06), (SL > 5.08), (PL ≤ 5.86) ->
Versicolor
Rule 4: (PW
≤ 1.44), (SW ≤ 3.06), (SL > 5.08), (PL > 5.86) ->
Virginica
Rule 5: (PW ≤ 1.44), (SW > 3.06), (SL ≤ 6.06) ->
Setosa
Rule 7: (PW
≤ 1.44), (SW > 3.06), (SL > 6.06), (SW ≤ 4.09) ->
Versicolor
Rule 8: (PW
≤ 1.44), (SW > 4.09), (SL > 6.06), … ->
Setosa
Rule 9: (PW
> 1.44), (PL ≤ 4.08), (SL ≤ 6.25), (SW ≤ 3.09) ->
Virginica
Rule 10: (PW
> 1.44), (PL ≤ 4.08), (SL ≤ 6.25), (SW > 3.09) -> Setosa
Rule 11: (PW
> 1.44), (PL ≤ 4.08), (SL > 6.25), (PW ≤ 2.40) ->
Versicolor
Rule 12:
… , (PL
≤ 4.08), (SL > 6.25), (PW > 2.40) -> Virginica
Rule 13: (PW > 1.44), (PL > 4.08), (SW ≤ 3.77) … -> Virginica
Rule 15: (PW
> 1.44), (PL > 4.08), (SW > 3.77), (PW ≤ 2.04) -> Setosa
Rule 16: …,
(PL > 4.08), (SW > 3.77), (PW > 2.04) -> Virginica
MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM
sepal length (f0): min = 4.3, max = 7.9
sepal width (f1): min = 2.0, max = 4.4
petal length (f2): min = 1.0, max = 6.9
petal width (f3): min = 0.1, max = 2.5
class: 0 (Decision tree); Class_0 (SWRL rules); setosa
(name of class in the original dataset);
class: 1 (Decision tree); Class_1 (SWRL rules);
versicolor (name of class in the original dataset);
class: 2 (Decision tree); Class_2 (SWRL rules);
virginica (name of class in the original dataset);
SET OF GENERATED RULES:
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), lessThanOrEqual(?x1,
5.08), lessThanOrEqual(?x2, 3.06), lessThanOrEqual(?x3, 3.40), lessThanOrEqual(?x4,
1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x3,
3.40), lessThanOrEqual(?x1, 5.08), lessThanOrEqual(?x2, 3.06), lessThanOrEqual(?x4,
1.44) -> Versicolor(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x1,
5.08), lessThanOrEqual(?x2, 3.06), lessThanOrEqual(?x3, 5.86), lessThanOrEqual(?x4,
1.44) -> Versicolor(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x1,
5.08), greaterThan(?x3, 5.86), lessThanOrEqual(?x2, 3.06), lessThanOrEqual(?x4,
1.44) -> Virginica(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x2,
3.06), lessThanOrEqual(?x1, 6.06), lessThanOrEqual(?x3, 5.88), lessThanOrEqual(?x4,
1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x2,
3.06), greaterThan(?x3, 5.88), lessThanOrEqual(?x1, 6.06), lessThanOrEqual(?x4,
1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_width(?p, ?x4), greaterThan(?x1, 6.06), greaterThan(?x2,
3.06), lessThanOrEqual(?x2, 4.09), lessThanOrEqual(?x4, 1.44) -> Versicolor(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_width(?p, ?x4), greaterThan(?x1, 6.06), greaterThan(?x2,
4.09), lessThanOrEqual(?x4, 1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x4,
1.44), lessThanOrEqual(?x1, 6.25), lessThanOrEqual(?x2, 3.09), lessThanOrEqual(?x3,
4.08) -> Virginica(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), sepal_width(?p, ?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x2,
3.09), greaterThan(?x4, 1.44), lessThanOrEqual(?x1, 6.25), lessThanOrEqual(?x3,
4.08) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x1, 6.25),
greaterThan(?x4, 1.44), lessThanOrEqual(?x3, 4.08), lessThanOrEqual(?x4, 2.40)
-> Versicolor(?p)
Unclassified_Iris(?p), sepal_length
(?p, ?x1), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x1, 6.25),
greaterThan(?x4, 2.40), lessThanOrEqual(?x3, 4.08) -> Virginica(?p)
Unclassified_Iris(?p), sepal_width(?p,
?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x3, 4.08), greaterThan(?x4,
1.44), lessThanOrEqual(?x2, 3.19) -> Virginica(?p)
Unclassified_Iris(?p), sepal_width(?p,
?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x2, 3.19), greaterThan(?x3,
4.08), greaterThan(?x4, 1.44), lessThanOrEqual(?x2, 3.77) -> Virginica(?p)
Unclassified_Iris(?p), sepal_width(?p,
?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x2, 3.77), greaterThan(?x3,
4.08), greaterThan(?x4, 1.44), lessThanOrEqual(?x4, 2.04) -> Setosa(?p)
Unclassified_Iris(?p), sepal_width(?p,
?x2), petal_length(?p, ?x3), petal_width(?p, ?x4), greaterThan(?x2, 3.77), greaterThan(?x3,
4.08), greaterThan(?x4, 2.04) -> Virginica(?p)
FINAL OPTIMIZED SET OF RULES:
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), lessThanOrEqual(?SL, 5.08), lessThanOrEqual(?SW, 3.06),
lessThanOrEqual(?PL, 3.40), lessThanOrEqual(?PW, 1.44) -> Setosa(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?PL, 3.40), lessThanOrEqual(?SL, 5.08),
lessThanOrEqual(?SW, 3.06), lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?SL, 5.08), lessThanOrEqual(?SW, 3.06),
lessThanOrEqual(?PL, 5.86), lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?SL, 5.08), greaterThan(?PL, 5.86),
lessThanOrEqual(?SW, 3.06), lessThanOrEqual(?PW, 1.44) -> Virginica(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_width(?p, ?PW),
greaterThan(?SW, 3.06), lessThanOrEqual(?SL, 6.06), lessThanOrEqual(?PW, 1.44)
-> Setosa(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_width(?p, ?PW),
greaterThan(?SL, 6.06), greaterThan(?SW, 3.06), lessThanOrEqual(?SW, 4.09),
lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_width(?p, ?PW),
greaterThan(?SL, 6.06), greaterThan(?SW, 4.09), lessThanOrEqual(?PW, 1.44)
-> Setosa(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?PW, 1.44), lessThanOrEqual(?SL, 6.25),
lessThanOrEqual(?SW, 3.09), lessThanOrEqual(?PL, 4.08) -> Virginica(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), sepal_width(?p, ?SW), petal_length(?p, ?PL),
petal_width(?p, ?PW), greaterThan(?SW, 3.09), greaterThan(?PW, 1.44),
lessThanOrEqual(?SL, 6.25), lessThanOrEqual(?PL, 4.08) -> Setosa(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SL, 6.25), greaterThan(?PW, 1.44), lessThanOrEqual(?PL, 4.08),
lessThanOrEqual(?PW, 2.40) -> Versicolor(?p)
Unclassified_Iris(?p),
sepal_length (?p, ?SL), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SL, 6.25), greaterThan(?PW, 2.40), lessThanOrEqual(?PL, 4.08)
-> Virginica(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?PL,
4.08), greaterThan(?PW, 1.44), lessThanOrEqual(?SW, 3.77) -> Virginica(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SW,
3.77), greaterThan(?PL, 4.08), greaterThan(?PW, 1.44), lessThanOrEqual(?PW,
2.04) -> Setosa(?p)
Unclassified_Iris(?p),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SW,
3.77), greaterThan(?PL, 4.08), greaterThan(?PW, 2.04) -> Virginica(?p)
Resulting
ontology is available in:
http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL_4.owl
Expanded Rule-Based Classifier & Visualization
This script includes all rules, applies
them to a grid of values for each attribute pair, and generates multiple
decision boundary plots.
Key enhancements:
·
Expanded
rule set: Includes all classification rules.
·
Multiple
decision boundary visualizations:
o (Petal Length, Petal Width)
o (Sepal Length, Sepal Width)
o (Sepal Length, Petal Length)
o (Sepal Width, Petal Width)
·
Overlays
real Iris dataset points: Ensures rule consistency with actual data.
________________________________________
How it works:
1. Creates a mesh grid of values for
each feature pair.
2. Classifies each grid point using the
expanded rule set.
3. Maps classification to colors
(Setosa=0, Versicolor=1, Virginica=2).
4. Plots decision boundaries using imshow().
5. Overlays actual data points to
compare with real Iris flower classifications.
import numpy as np
import matplotlib.pyplot
as plt
from sklearn.datasets
import load_iris
# Load Iris dataset for validation
iris = load_iris()
X = iris.data # Sepal & Petal
measurements
y = iris.target # Class labels: 0=Setosa, 1=Versicolor, 2=Virginica
species = iris.target_names
# Rule-based classifier function (expanded)
def classify_iris(SL, SW, PL, PW):
# Apply rules
manually as per provided rule set
# Setosa
classification
if PL <= 3.40 and PW <= 1.44:
return "Setosa"
if SW > 3.06 and SL <= 6.06 and PW <= 1.44:
return "Setosa"
if SW > 4.09 and SL > 6.06 and PW <= 1.44:
return "Setosa"
if SW > 3.09 and PW > 1.44 and SL <= 6.25 and PL <= 4.08:
return "Setosa"
if SW > 3.77 and PL > 4.08 and PW > 1.44 and PW <= 2.04:
return "Setosa"
# Versicolor
classification
if PL > 3.40 and PW <= 1.44 and SL <= 5.08 and SW <= 3.06:
return "Versicolor"
if SL > 5.08 and SW <= 3.06 and PL <= 5.86 and PW <= 1.44:
return "Versicolor"
if SL > 6.06 and SW > 3.06 and SW <= 4.09 and PW <= 1.44:
return "Versicolor"
if SL > 6.25 and PW > 1.44 and PL <= 4.08 and PW <= 2.40:
return "Versicolor"
# Virginica
classification
if SL > 5.08 and PL > 5.86 and SW <= 3.06 and PW <= 1.44:
return "Virginica"
if PW > 1.44 and SL <= 6.25 and SW <= 3.09 and PL <= 4.08:
return "Virginica"
if SL > 6.25 and PW > 2.40 and PL <= 4.08:
return "Virginica"
if SW <= 3.77 and PL > 4.08 and PW > 1.44:
return "Virginica"
if SW > 3.77 and PL > 4.08 and PW > 2.04:
return "Virginica"
return "Unknown" # Shouldn't happen if rules are correct
# Define function to plot decision boundaries for
different feature pairs
def plot_decision_boundary(feature1_idx, feature2_idx, feature1_name, feature2_name):
feature1_range = np.linspace(X[:,
feature1_idx].min() - 0.5, X[:, feature1_idx].max() + 0.5, 100)
feature2_range = np.linspace(X[:,
feature2_idx].min() - 0.5, X[:, feature2_idx].max() + 0.5, 100)
F1_grid, F2_grid = np.meshgrid(feature1_range,
feature2_range)
# Classify each
point in the grid
classes = np.array([
[classify_iris(*(0, 0, f1, f2)) if feature1_idx > 1 and feature2_idx > 1 else
classify_iris(*(f1, f2, 0, 0)) if feature1_idx < 2 and feature2_idx < 2 else
classify_iris(*(f1, 0, f2, 0)) if feature1_idx <
2 else
classify_iris(*(0, f1, 0, f2))
for f1 in feature1_range] for f2 in feature2_range])
# Map species to
numerical values
species_to_color = {"Setosa": 0, "Versicolor": 1, "Virginica": 2, "Unknown": 3}
color_grid = np.vectorize(species_to_color.get)(classes)
# Plot decision
boundaries
plt.figure(figsize=(8, 6))
plt.imshow(color_grid,
extent=[feature1_range.min(),
feature1_range.max(),
feature2_range.min(), feature2_range.max()],
origin="lower", alpha=0.5, cmap='viridis')
# Scatter plot of
actual Iris data points
for i, spec in enumerate(species):
plt.scatter(X[y
== i, feature1_idx], X[y == i, feature2_idx], label=spec, edgecolor='k')
# Labels and
legend
plt.xlabel(feature1_name)
plt.ylabel(feature2_name)
plt.title(f"Decision Boundaries ({feature1_name} vs {feature2_name})")
plt.legend()
plt.colorbar(label="Species")
plt.show()
# Generate decision boundary plots for various feature
pairs
plot_decision_boundary(2, 3, "Petal Length", "Petal
Width")
plot_decision_boundary(0, 1, "Sepal Length", "Sepal
Width")
plot_decision_boundary(0, 2, "Sepal Length", "Petal Length")
plot_decision_boundary(1, 3, "Sepal Width", "Petal
Width")
To project the Iris samples onto two main components while respecting
the rule-based classifier, we can:
1. Use Principal Component Analysis (PCA): Reduce the
4D features (sepal_length, sepal_width, petal_length, petal_width) to 2D for
visualization.
2. Apply the Rule-Based Classifier: Classify the
PCA-projected points without using a machine learning model,
but purely based on the provided rules.
3. Visualize Decision Boundaries: Plot rule-based
decision regions in PCA space alongside actual Iris dataset projections.
import numpy as np
import matplotlib.pyplot
as plt
from sklearn.decomposition
import PCA
from sklearn.datasets
import load_iris
# Load Iris dataset
iris = load_iris()
X = iris.data # Features: (Sepal
Length, Sepal Width, Petal Length, Petal Width)
y = iris.target # Target classes: 0=Setosa, 1=Versicolor, 2=Virginica
species = iris.target_names
# Apply PCA to reduce from 4D to 2D
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X) # Transform data to
principal components
# Rule-based classifier function (unchanged)
def classify_iris(SL, SW, PL, PW):
if PL <= 3.40 and PW <= 1.44:
return "Setosa"
if SW > 3.06 and SL <= 6.06 and PW <= 1.44:
return "Setosa"
if SW > 4.09 and SL > 6.06 and PW <= 1.44:
return "Setosa"
if SW > 3.09 and PW > 1.44 and SL <= 6.25 and PL <= 4.08:
return "Setosa"
if SW > 3.77 and PL > 4.08 and PW > 1.44 and PW <= 2.04:
return "Setosa"
if PL > 3.40 and PW <= 1.44 and SL <= 5.08 and SW <= 3.06:
return "Versicolor"
if SL > 5.08 and SW <= 3.06 and PL <= 5.86 and PW <= 1.44:
return "Versicolor"
if SL > 6.06 and SW > 3.06 and SW <= 4.09 and PW <= 1.44:
return "Versicolor"
if SL > 6.25 and PW > 1.44 and PL <= 4.08 and PW <= 2.40:
return "Versicolor"
if SL > 5.08 and PL > 5.86 and SW <= 3.06 and PW <= 1.44:
return "Virginica"
if PW > 1.44 and SL <= 6.25 and SW <= 3.09 and PL <= 4.08:
return "Virginica"
if SL > 6.25 and PW > 2.40 and PL <= 4.08:
return "Virginica"
if SW <= 3.77 and PL > 4.08 and PW > 1.44:
return "Virginica"
if SW > 3.77 and PL > 4.08 and PW > 2.04:
return "Virginica"
return "Unknown"
# Generate a grid in PCA space
x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min,
x_max, 200), np.linspace(y_min, y_max, 200))
# Inverse transform grid points back to original
feature space
grid_points = np.c_[xx.ravel(),
yy.ravel()]
grid_points_original = pca.inverse_transform(grid_points)
# Classify grid points using rule-based function
grid_classes = np.array([classify_iris(*point)
for point in grid_points_original])
species_to_color = {"Setosa": 0, "Versicolor": 1, "Virginica": 2, "Unknown": 3}
grid_colors = np.vectorize(species_to_color.get)(grid_classes).reshape(xx.shape)
# Plot decision boundaries in PCA space
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy,
grid_colors, alpha=0.3, cmap="viridis")
# Scatter plot actual PCA-transformed data
for i, spec in enumerate(species):
plt.scatter(X_pca[y
== i, 0], X_pca[y == i, 1], label=spec, edgecolor='k')
# Labels and legend
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.title("Rule-Based Decision Boundaries in PCA Space")
plt.legend()
plt.colorbar(label="Species")
plt.show()
SWRL-to-SPARQL translation of
the rules
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW), lessThanOrEqual(?SL,
5.08), lessThanOrEqual(?SW, 3.06), lessThanOrEqual(?PL, 3.40),
lessThanOrEqual(?PW, 1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?PL, 3.40), lessThanOrEqual(?SL, 5.08), lessThanOrEqual(?SW, 3.06),
lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SL, 5.08), lessThanOrEqual(?SW, 3.06), lessThanOrEqual(?PL, 5.86),
lessThanOrEqual(?PW, 1.44) -> Versicolor(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SL, 5.08), greaterThan(?PL, 5.86), lessThanOrEqual(?SW, 3.06),
lessThanOrEqual(?PW, 1.44) -> Virginica(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_width(?p, ?PW), greaterThan(?SW, 3.06),
lessThanOrEqual(?SL, 6.06), lessThanOrEqual(?PW, 1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_width(?p, ?PW), greaterThan(?SL, 6.06),
greaterThan(?SW, 3.06), lessThanOrEqual(?SW, 4.09), lessThanOrEqual(?PW, 1.44)
-> Versicolor(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_width(?p, ?PW), greaterThan(?SL, 6.06),
greaterThan(?SW, 4.09), lessThanOrEqual(?PW, 1.44) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?PW, 1.44), lessThanOrEqual(?SL, 6.25), lessThanOrEqual(?SW, 3.09),
lessThanOrEqual(?PL, 4.08) -> Virginica(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
sepal_width(?p, ?SW), petal_length(?p, ?PL), petal_width(?p, ?PW),
greaterThan(?SW, 3.09), greaterThan(?PW, 1.44), lessThanOrEqual(?SL, 6.25),
lessThanOrEqual(?PL, 4.08) -> Setosa(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SL, 6.25),
greaterThan(?PW, 1.44), lessThanOrEqual(?PL, 4.08), lessThanOrEqual(?PW, 2.40)
-> Versicolor(?p)
Unclassified_Iris(?p), sepal_length (?p, ?SL),
petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SL, 6.25),
greaterThan(?PW, 2.40), lessThanOrEqual(?PL, 4.08) -> Virginica(?p)
Unclassified_Iris(?p), sepal_width(?p, ?SW),
petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?PL, 4.08),
greaterThan(?PW, 1.44), lessThanOrEqual(?SW, 3.77) -> Virginica(?p)
Unclassified_Iris(?p), sepal_width(?p, ?SW),
petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SW, 3.77),
greaterThan(?PL, 4.08), greaterThan(?PW, 1.44), lessThanOrEqual(?PW, 2.04)
-> Setosa(?p)
Unclassified_Iris(?p), sepal_width(?p, ?SW),
petal_length(?p, ?PL), petal_width(?p, ?PW), greaterThan(?SW, 3.77),
greaterThan(?PL, 4.08), greaterThan(?PW, 2.04) -> Virginica(?p)
oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
PREFIX rdf:
<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX : < http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL_4.owl#>
CONSTRUCT {
?p a ?class .
}
WHERE {
?p rdf:type :Unclassified_Iris ;
:sepal_length ?SL ;
:sepal_width ?SW ;
:petal_length ?PL ;
:petal_width ?PW .
BIND (
IF(?SL <= 5.08 && ?SW <= 3.06 && ?PL
<= 3.40 && ?PW <= 1.44, :Setosa,
IF(?PL > 3.40 && ?SL <= 5.08 && ?SW
<= 3.06 && ?PW <= 1.44, :Versicolor,
IF(?SL > 5.08 && ?SW <= 3.06 && ?PL
<= 5.86 && ?PW <= 1.44, :Versicolor,
IF(?SL > 5.08 && ?PL > 5.86 && ?SW
<= 3.06 && ?PW <= 1.44, :Virginica,
IF(?SW > 3.06 && ?SL <= 6.06 && ?PW
<= 1.44, :Setosa,
IF(?SL > 6.06 && ?SW > 3.06 && ?SW
<= 4.09 && ?PW <= 1.44, :Versicolor,
IF(?SL > 6.06 && ?SW > 4.09 && ?PW
<= 1.44, :Setosa,
IF(?PW > 1.44 && ?SL <= 6.25 && ?SW
<= 3.09 && ?PL <= 4.08, :Virginica,
IF(?SW > 3.09 && ?PW > 1.44 && ?SL
<= 6.25 && ?PL <= 4.08, :Setosa,
IF(?SL > 6.25 && ?PW > 1.44 && ?PL
<= 4.08 && ?PW <= 2.40, :Versicolor,
IF(?SL > 6.25 && ?PW > 2.40 && ?PL
<= 4.08, :Virginica,
IF(?PL > 4.08 && ?PW > 1.44 && ?SW
<= 3.77, :Virginica,
IF(?SW > 3.77 && ?PL > 4.08 && ?PW
> 1.44 && ?PW <= 2.04, :Setosa,
IF(?SW > 3.77 && ?PL > 4.08 && ?PW
> 2.04, :Virginica,
UNDEF))))))))))))))) AS ?class)
FILTER (BOUND(?class))
}
oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
PREFIX rdf:
<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX : <http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL_4.owl#>
CONSTRUCT {
?p rdf:type ?class .
}
WHERE {
?p rdf:type :Unclassified_Iris ;
:sepal_length ?SL ;
:sepal_width ?SW ;
:petal_length ?PL ;
:petal_width ?PW .
BIND
(
IF(?SL <= 5.08 && ?SW <= 3.06 && ?PL
<= 3.40 && ?PW <= 1.44, :Setosa,
IF(?PL > 3.40 && ?SL <= 5.08 && ?SW
<= 3.06 && ?PW <= 1.44, :Versicolor,
IF(?SL > 5.08 && ?SW <= 3.06 && ?PL
<= 5.86 && ?PW <= 1.44, :Versicolor,
IF(?SL > 5.08 && ?PL > 5.86 && ?SW
<= 3.06 && ?PW <= 1.44, :Virginica,
IF(?SW > 3.06 && ?SL <= 6.06 && ?PW
<= 1.44, :Setosa,
IF(?SL > 6.06 && ?SW > 3.06 && ?SW
<= 4.09 && ?PW <= 1.44, :Versicolor,
IF(?SL > 6.06 && ?SW > 4.09 && ?PW
<= 1.44, :Setosa,
IF(?PW > 1.44 && ?SL <= 6.25 && ?SW
<= 3.09 && ?PL <= 4.08, :Virginica,
IF(?SW > 3.09 && ?PW > 1.44 && ?SL
<= 6.25 && ?PL <= 4.08, :Setosa,
IF(?SL > 6.25 && ?PW > 1.44 && ?PL
<= 4.08 && ?PW <= 2.40, :Versicolor,
IF(?SL > 6.25 && ?PW > 2.40 && ?PL
<= 4.08, :Virginica,
IF(?PL > 4.08 && ?PW > 1.44 && ?SW
<= 3.77, :Virginica,
IF(?SW > 3.77 && ?PL > 4.08 && ?PW
> 1.44 && ?PW <= 2.04, :Setosa,
IF(?SW > 3.77 && ?PL > 4.08 && ?PW
> 2.04, :Virginica, ""))))))))))))))) AS ?class)
FILTER (?class != "")
}
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
PREFIX rdf:
<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX : <http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL_4.owl#>
CONSTRUCT {
?p rdf:type ?class .
}
WHERE {
?p rdf:type :Unclassified_Iris ;
:sepal_length ?SL ;
:sepal_width ?SW ;
:petal_length ?PL ;
:petal_width ?PW .
VALUES (?SL_min ?SL_max ?SW_min ?SW_max ?PL_min
?PL_max ?PW_min ?PW_max ?class) {
(0 5.08 0 3.06 0 3.40 0 1.44 :Setosa)
(0 5.08 0 3.06 3.41 100 0 1.44 :Versicolor)
(5.09 100 0 3.06 0 5.86 0 1.44 :Versicolor)
(5.09 100 0 3.06 5.87 100 0 1.44 :Virginica)
(0 6.06 3.07 100 0 100 0 1.44 :Setosa)
(6.07 100 3.07 4.09 0 100 0 1.44 :Versicolor)
(6.07 100 4.10 100 0 100 0 1.44 :Setosa)
(0 6.25 0 3.09 0 4.08 1.45 100 :Virginica)
(0 6.25 3.10 100 0 4.08 1.45 100 :Setosa)
(6.26 100 0 100 0 4.08 1.45 2.40 :Versicolor)
(6.26 100 0 100 0 4.08 2.41 100 :Virginica)
(0 100 0 3.77 4.09 100 1.45 100 :Virginica)
(0 100 3.78 100 4.09 100 1.45 2.04 :Setosa)
(0 100 3.78 100 4.09 100 2.05 100 :Virginica)
}
FILTER(?SL >= ?SL_min && ?SL <= ?SL_max
&&
?SW >= ?SW_min && ?SW <= ?SW_max
&&
?PL >= ?PL_min && ?PL <= ?PL_max
&&
?PW >= ?PW_min && ?PW <= ?PW_max)
}
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
PREFIX rdf:
<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX :
<http://ai.it.jyu.fi/vagan/ontologies/2024/SWRL_4.owl#>
CONSTRUCT {
?p a :Setosa .
}
WHERE {
?p rdf:type :Unclassified_Iris ;
:sepal_length ?SL ;
:sepal_width ?SW ;
:petal_length ?PL ;
:petal_width ?PW .
FILTER (?SL <= 5.08 && ?SW <=
3.06 && ?PL <= 3.40 && ?PW <= 1.44)
}
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
Running the SPARQL CONSTRUCT query, saving the results
as an RDF file, and then importing that file into Protégé (or any OWL ontology
editor) would effectively update the ontology with the inferred facts, similar to what SWRL rules would do within a reasoner.
However, there are some nuances to consider:
Running the SPARQL CONSTRUCT query: The query generates new RDF triples that classify unclassified iris
instances (Unclassified_Iris) as Setosa based on attribute conditions. These
inferred triples (e.g., <individual> ex:classifiedAs
ex:Setosa .) are added to the RDF store.
Saving the results: The output of
the CONSTRUCT query can be stored as an RDF file (e.g., inferred_results.ttl in
Turtle format).
Importing into Protégé: Open Protégé and
import the RDF file containing the inferred triples. Since Protégé operates on
an OWL ontology, it will treat these new triples as explicit assertions,
meaning the classification results from SPARQL inference are now part of the
ontology.
Key difference from SWRL:
·
SWRL rules do not
modify the ontology directly, but rather produce
inferred results dynamically using a reasoner.
·
SPARQL CONSTRUCT
adds inferred facts as explicit assertions, effectively modifying the ontology
persistently after saving and reloading.
·
If you later
update the dataset and rerun the query, you may need to manually remove
outdated inferences (whereas SWRL reasoners automatically recompute results).
Alternative--Using SPARQL in a Triplestore: Instead of manually importing results into Protégé, you could use a
triplestore like Apache Jena Fuseki or GraphDB that supports SPARQL CONSTRUCT
inference in real time. Some triplestores allow defining SPARQL rules (similar to SWRL) that dynamically apply without needing to
modify the ontology manually.
Therefore, SPARQL can serve as an alternative to SWRL
for inference, and you can update the ontology by importing the results into
Protégé. However, unlike SWRL reasoners, which work dynamically, SPARQL
requires re-running the queries and updating the ontology manually if new data
arrives.
USE OF CHATGPT AS EXTERNAL SERVICE
Use of ChatGPT as external service: Our system relies on ChatGPT as an external service for four key tasks,
all of which are fully automated through API calls without requiring
human-in-the-middle interactions. While it would have been possible to embed
these functionalities directly into our system, we chose to outsource them to
ChatGPT due to its flexibility, natural language processing capabilities, and
ability to generate structured transformations on demand. First, ChatGPT
performs feature mapping for ontology alignment, automatically converting
internal artificial variable representations (such as f0, f1, Class_1, etc.)
into meaningful domain-specific names (such as petal_length, Setosa, etc.),
ensuring that generated SWRL rules remain interpretable. Second, it generates
natural language explanations for SWRL rules, translating formal logical
expressions into human-readable descriptions that provide intuitive insights
into the classification logic. Third, it explains classification outcomes
derived from Pellet reasoning in Protégé by analyzing the attributes of test
samples and justifying why a particular class was assigned based on learned
rules. Finally, ChatGPT assists in transforming SWRL rules into SPARQL queries,
allowing for an alternative, query-based representation of logical rules,
useful for systems that do not support SWRL reasoning directly. By integrating
ChatGPT through an automated API-driven workflow, our system remains
lightweight while leveraging advanced NLP and reasoning capabilities without
requiring additional in-house implementations for rule translation, explanation
generation, and variable mapping. This approach enhances interpretability and
interoperability while reducing system complexity.
1. Feature mapping for ontology alignment via ChatGPT
In our framework, ChatGPT serves as an external
service for enhancing the interpretability of extracted rules. Specifically,
after the NN-Decision_Tree-SWRL pipeline generates SWRL rules, they initially
contain artificial variable names (e.g., f0, f1, Class_1, Class_2). To align
these rules with the ontology, we employ feature mapping for ontology alignment
using ChatGPT. The algorithm constructs a prompt containing the generated SWRL
rules with artificial variables and a mapping schema that defines which artificial
variable corresponds to which original feature or class (e.g., f0 →
petal_length, Class_1 → Setosa). ChatGPT then processes this input and
returns a transformed version of the SWRL rules, where all artificial variables
are replaced with their corresponding real-world feature names. This ensures
that the extracted rules remain meaningful, interpretable, and fully integrated
with the ontology. By leveraging ChatGPT for this semantic transformation, our
approach improves the usability of rule-based models in knowledge
representation systems.
2. Generating natural language explanations for SWRL rules via ChatGPT
In our framework, ChatGPT is used as an external
service to automatically generate natural language (NL) explanations for SWRL
rules. This enhances the interpretability of rule-based models by translating
logical expressions into human-readable descriptions.
After the NN-Decision_Tree-SWRL pipeline extracts
rules in SWRL format, the system calls ChatGPT via API to generate textual
explanations. The pipeline constructs a structured prompt containing: the SWRL
rule in its logical form; a request to rephrase it in an intuitive NL format;
guidance on structuring the response, including a rule summary and an
interpretation.
Below is an example of how the procedure sends a
request to ChatGPT via API:
import openai
api_key = "your_api_key_here"
def generate_nl_explanation(rule_text):
prompt = f"""Convert
the following SWRL rule into a natural language explanation:
Rule: {rule_text}
Your response should:
1. Present the rule in a structured format with bullet
points.
2. Summarize the conditions and classification result.
3. Provide an intuitive interpretation in simple language.
Example Output Format:
- **Rule X:** If a flower has:
• Condition 1
• Condition 2
Then it belongs to Class Y.
- **Interpretation:** Explanation in
layman’s terms.
"""
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0.5
)
return response["choices"][0]["message"]["content"]
# Example SWRL rule input
swrl_rule = """Unclassified_Iris(?p), sepal_width(?p, ?x2), petal_length(?p, ?x3),
petal_width(?p, ?x4), greaterThan(?x3, 4.00),
greaterThan(?x4, 1.70),
lessThanOrEqual(?x2, 3.80) ->
Virginica(?p)"""
# Generate explanation
nl_explanation = generate_nl_explanation(swrl_rule)
print(nl_explanation)
Example Output from ChatGPT
Rule 7:
If a flower has:
• Sepal width ≤ 3.80 cm
• Petal length > 4.00 cm
• Petal width > 1.70 cm
Then it is classified as Virginica.
Interpretation: If the petal length is relatively long (> 4.00 cm) and the petal width is also large (> 1.70 cm), but the
sepal width is not too wide (≤ 3.80 cm), then the
flower is classified as Virginica.
Rule 7:
If a flower has:
•
Sepal width
≤ 3.80 cm
•
Petal length >
4.00 cm
•
Petal width >
1.70 cm
… then it is classified as Virginica.
Interpretation: If the petal
length is relatively long (> 4.00 cm) and the petal width is also large
(> 1.70 cm), but the sepal width is not too wide (≤ 3.80 cm), then the
flower is classified as Virginica.
This automated NL explanation generation significantly
improves human interpretability, making it easier for non-experts to understand
and validate the extracted knowledge. By integrating ChatGPT as a semantic
translator, our pipeline bridges the gap between machine-generated rules and
human comprehension.
3. Generating Explanations for Classification Outcomes via ChatGPT
In our framework, ChatGPT is used to provide intuitive
explanations for classification results obtained by the Pellet reasoner in
Protégé. The goal is to help users understand why a specific test sample is
classified into a given category based on its attributes.
After a test sample is classified, the system
automatically constructs a structured prompt for ChatGPT, containing: the test
sample’s feature values (e.g., sepal length, petal width, etc.); the
classification outcome assigned by the reasoner; a request for ChatGPT to
explain the decision by referencing relevant classification rules.
Below is an example of how the pipeline calls ChatGPT
via API to generate explanations:
import openai
api_key = "your_api_key_here"
def explain_classification(sample_data, classification_result):
prompt = f"""Given
the following test sample and classification result, explain why this
classification was assigned:
Test Sample:
{sample_data}
Classification: {classification_result}
Your response should:
1. List the key feature values.
2. Compare these values against relevant classification
rules.
3. Explain how the classification follows from the rules.
4. Provide an intuitive interpretation.
Example Output Format:
- **Explanation:** Feature
comparisons and rule matching.
- **Interpretation:** Simple
explanation of why the classification makes sense.
"""
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0.5
)
return response["choices"][0]["message"]["content"]
# Example test sample and classification result
test_sample = """{sepal
length (cm) = 6.5; sepal width (cm) = 3.5;
petal length (cm) = 5.0; petal width (cm) = 1.6}"""
classification = "Versicolor"
# Generate explanation
explanation = explain_classification(test_sample,
classification)
print(explanation)
Example Output from ChatGPT
Explanation:
- The sepal length (6.5 cm) is > 6.14 cm.
- The sepal width (3.5 cm) is ≤ 3.89 cm.
- The petal width (1.6 cm) is ≤ 1.70 cm.
- These values match Rule 3, which classifies the flower as Versicolor.
Interpretation:
The flower has a slightly larger sepal length but remains within the range
where Versicolor is assigned, as petal width and sepal width still meet the necessary conditions.
4. Transforming SWRL rule sets into SPARQL queries via ChatGPT
Why and When is this
Transformation Needed? SWRL (Semantic Web Rule Language) is a powerful
rule-based reasoning framework used in ontology-driven applications. However,
in some cases, transforming SWRL rules into SPARQL queries may be beneficial:
·
Interoperability: Not all ontology systems or triple stores support SWRL reasoning,
whereas SPARQL is widely supported.
·
Alternative querying mechanism: If a system does not support real-time rule-based
reasoning, SWRL rules can be translated into SPARQL to allow explicit querying
instead.
·
Optimized execution: SWRL rules require an inference engine (e.g., Pellet), whereas SPARQL
queries can retrieve results directly from an RDF store without full reasoning
overhead.
·
Integration with other systems: Some knowledge graph applications, such as
enterprise semantic search, prefer SPARQL over SWRL for efficient data access.
We leverage ChatGPT as an external service to
automatically convert SWRL rules into equivalent SPARQL queries. The
transformation follows these steps:
·
SWRL rule set: The
input consists of SWRL rules that express logical conditions and implications.
·
ChatGPT prompting:
We construct a structured prompt that provides the SWRL rule and requests an
equivalent SPARQL representation.
·
Generated SPARQL query:
ChatGPT returns a corresponding SPARQL SELECT or ASK query that retrieves
instances satisfying the rule.
Below is an example of how our pipeline sends a
request to ChatGPT for transformation:
===========================================================================================
import openai
api_key = "your_api_key_here"
def swrl_to_sparql(swrl_rule):
prompt = f"""Convert
the following SWRL rule into an equivalent SPARQL query:
SWRL Rule:
{swrl_rule}
Guidelines:
- Convert class predicates into RDF triples.
- Use FILTER conditions for numeric comparisons.
- Ensure the query returns all matching instances.
Output the SPARQL query in proper syntax."""
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0.3
)
return response["choices"][0]["message"]["content"]
# Example SWRL rule
swrl_rule = """
Unclassified_Iris(?p), sepal_width(?p, ?x2),
petal_length(?p, ?x3), petal_width(?p, ?x4),
greaterThan(?x3, 4.00),
greaterThan(?x4, 1.70), lessThanOrEqual(?x2, 3.80)-> Virginica(?p)
"""
# Convert to SPARQL
sparql_query = swrl_to_sparql(swrl_rule)
print(sparql_query)
===========================================================================================
Example output (generated SPARQL query)
=====================================================
SELECT ?p WHERE {
?p rdf:type :Unclassified_Iris .
?p :sepal_width ?x2 .
?p :petal_length ?x3 .
?p :petal_width ?x4 .
FILTER (?x3 > 4.00 && ?x4 > 1.70 && ?x2 <= 3.80)}
=====================================================
Such automated rule translation eliminates the need
for manual SWRL-to-SPARQL conversion, enables reasoning-like queries in systems
that do not support SWRL, and allows querying knowledge graphs dynamically
without full ontology reasoning.
----------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
Experiments
with multiple datasets (draft, without rules’ optimization /
require further verification and debugging)
EXPERIMENT with Deeper Decision Tree
(FORMER UNVERFIED DRAFT! Without rules-optimization):
import numpy as np
import pandas as pd
from sklearn.datasets
import load_digits, load_wine,
load_iris
from sklearn.model_selection
import train_test_split
from sklearn.preprocessing
import StandardScaler
from sklearn.neural_network
import MLPClassifier
from sklearn.tree
import DecisionTreeClassifier,
export_text
import torch
import re
import requests
from io import StringIO
def load_and_process_data(dataset_name):
"""Loads
and preprocesses a dataset."""
if dataset_name == "digits":
digits = load_digits()
X = digits.data
y = digits.target
feature_names = [f"pixel_{i}" for i in range(X.shape[1])]
class_names = digits.target_names
elif dataset_name == "wine":
wine = load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names
class_names = wine.target_names
elif dataset_name == "iris":
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names
elif dataset_name == "breast_cancer":
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
s = requests.get(url).content
df = pd.read_csv(StringIO(s.decode('utf-8')), header=None)
X = df.iloc[:,
2:].values
y = df.iloc[:,
1].map({'M': 1, 'B': 0}).values
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
class_names = np.unique(y)
else:
raise ValueError("Invalid dataset name")
# Create feature
mapping dictionary
feature_mapping = {f"f{i}": (f"hasF{i}",
feature_names[i]) for i in range(len(feature_names))}
# Create class
mapping dictionary
class_mapping = {f"class: {i}": (f"Class_{i}", class_names[i]) for i in range(len(class_names))}
# Split dataset
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.2, random_state=42)
# Normalize
features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, X_test, y_train, y_test, feature_mapping, class_mapping, X.shape[1], scaler
def train_models(X_train, X_test, y_train, y_test, X_shape, scaler, num_synthetic_samples = None):
"""Trains
a neural network and a decision tree."""
# Train neural
network with regularization and validation
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42, alpha=0.01)
mlp.fit(X_train,
y_train)
print(f'NN Test
Accuracy: {mlp.score(X_test, y_test):.4f}')
# ----------
Refined Stage 2: Generate Synthetic Samples ----------
def generate_synthetic_samples(model, num_samples, feature_ranges):
"""Generates
synthetic samples using a trained neural network.
Args:
model:
Trained neural network model (MLPClassifier).
num_samples:
Number of synthetic samples to generate.
feature_ranges: A list of tuples, each tuple containing min/max values for each
feature.
Returns:
A tuple
containing synthetic data samples and their predicted labels.
"""
synthetic_samples = []
for _ in range(num_samples):
#Generate a synthetic sample within the feature ranges
sample = np.array([np.random.uniform(low, high) for low, high in feature_ranges])
synthetic_samples.append(sample)
synthetic_samples = np.array(synthetic_samples)
# Get predictions
from the model
synthetic_probs = model.predict_proba(synthetic_samples)
synthetic_labels = np.argmax(synthetic_probs, axis=1) # Convert probs to class labels.
return synthetic_samples, synthetic_labels
# Define the range
of features based on the original training data
feature_ranges =
[(scaler.inverse_transform(np.array([[X_train[:, i].min() if j == i else 0 for j in range(X_shape)]]))[0][i], scaler.inverse_transform(np.array([[X_train[:, i].max() if j == i else 0 for j in range(X_shape)]]))[0][i]) for i in range(X_shape)]
# Generate
synthetic data
if num_synthetic_samples is None:
num_synthetic_samples = len(X_train) # Number of
synthetic samples
X_synthetic, y_synthetic =
generate_synthetic_samples(mlp, num_synthetic_samples,
feature_ranges)
# ---------- End
of Refined Stage 2 ----------
# Train decision
tree on the synthetic data
clf = DecisionTreeClassifier(max_depth=5, random_state=42)
clf.fit(X_synthetic,
y_synthetic)
return clf, mlp, X_synthetic, scaler
# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
def tree_to_swrl(tree, feature_names, class_mapping, scaler):
rules = []
def recurse(node, conditions, var_map, parent_bounds):
if tree.children_left[node] == -1 and
tree.children_right[node] == -1:
predicted_class = np.argmax(tree.value[node])
class_name =
class_mapping[f"class: {predicted_class}"][0]
rule = "Unclassified(?p) ^ " + " ^ ".join(sorted(conditions)) + f' -> {class_name}(?p)'
rules.append(rule)
return
feature_index = tree.feature[node]
feature =
feature_names[feature_index]
threshold = tree.threshold[node]
var = f'?x{feature_index + 1}'
#Inverse transform
the thresholds to the original scale
threshold_original_scale =
scaler.inverse_transform(np.array([[threshold if i == feature_index else 0 for i in range(len(feature_names))]]))[0][feature_index]
left_condition = f'has{feature.capitalize().replace(" ", "")}(?p, {var}) ^
swrlb:lessThanOrEqual({var}, {threshold_original_scale:.2f})'
right_condition = f'has{feature.capitalize().replace(" ", "")}(?p, {var}) ^
swrlb:greaterThan({var}, {threshold_original_scale:.2f})'
new_parent_bounds =
parent_bounds.copy()
# Avoid redundant
conditions
if not any(v == var and op == "leq" and threshold >= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "leq", threshold))
if not any(v == var and op == "gt" and threshold <= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "gt", threshold))
recurse(tree.children_left[node],
conditions + [left_condition], var_map.copy(), new_parent_bounds)
recurse(tree.children_right[node],
conditions + [right_condition], var_map.copy(), new_parent_bounds)
recurse(0, [], {}, [])
return rules
dataset_names = ["digits", "wine", "iris", "breast_cancer"]
num_synthetic_samples = 40000 # Set to the desired number of samples
for dataset_name in dataset_names:
print(f"\n---------- Dataset: {dataset_name} ----------\n")
# Load and process data
X_train, X_test, y_train, y_test,
feature_mapping, class_mapping, X_shape, scaler = load_and_process_data(dataset_name)
# Train the models
clf, mlp, X_synthetic, scaler = train_models(X_train, X_test, y_train, y_test, X_shape, scaler,
num_synthetic_samples)
# Print decision
tree
print('\nExtracted Decision Tree Rules:\n')
print(export_text(clf,
feature_names=[f"f{i}" for i in range(X_synthetic.shape[1])]))
for key, (swrl_name, actual_name) in feature_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of
attribute in the original dataset);")
print('\n')
for key, (swrl_name, actual_name) in class_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of class
in the original dataset);")
# Generate and
print SWRL rules
swrl_rules = tree_to_swrl(clf.tree_,
[f"f{i}" for i in range(X_synthetic.shape[1])],
class_mapping, scaler)
print('\nGenerated SWRL Rules:\n')
for rule in swrl_rules:
print(rule)
----------
Dataset: digits ----------
NN Test
Accuracy: 0.9750
Extracted
Decision Tree Rules:
|--- f19 <= 6.83
| |--- f27 <= 7.28
| | |--- f60 <= 5.28
| | |
|--- f12 <= 7.96
| | |
| |--- f61 <= 9.77
| | |
| | |--- class: 7
| | |
| |--- f61 > 9.77
| | |
| | |--- class: 1
| | |
|--- f12 >
7.96
| | |
| |--- f61 <= 9.52
| | |
| | |--- class: 7
| | |
| |--- f61 > 9.52
| | |
| | |--- class: 7
| | |--- f60 > 5.28
| | |
|--- f10 <= 8.12
| | |
| |--- f28 <= 6.09
| | |
| | |--- class: 1
| | |
| |--- f28 > 6.09
| | |
| | |--- class: 1
| | |
|--- f10 >
8.12
| | |
| |--- f12 <= 7.31
| | |
| | |--- class: 1
| | |
| |--- f12 > 7.31
| | |
| | |--- class: 1
| |--- f27 > 7.28
| | |--- f10 <= 7.25
| | |
|--- f44 <= 8.04
| | |
| |--- f28 <= 9.55
| | |
| | |--- class: 1
| | |
| |--- f28 > 9.55
| | |
| | |--- class: 1
| | |
|--- f44 >
8.04
| | |
| |--- f5 <= 6.94
| | |
| | |--- class: 1
| | |
| |--- f5 > 6.94
| | |
| | |--- class: 1
| | |--- f10 > 7.25
| | |
|--- f43 <= 7.31
| | |
| |--- f21 <= 7.95
| | |
| | |--- class: 1
| | |
| |--- f21 > 7.95
| | |
| | |--- class: 9
| | |
|--- f43 >
7.31
| | |
| |--- f28 <= 7.52
| | |
| | |--- class: 1
| | |
| |--- f28 > 7.52
| | |
| | |--- class: 1
|--- f19 >
6.83
| |--- f10 <= 8.47
| | |--- f20 <= 6.57
| | |
|--- f28 <= 5.03
| | |
| |--- f27 <= 6.96
| | |
| | |--- class: 1
| | |
| |--- f27 > 6.96
| | |
| | |--- class: 1
| | |
|--- f28 >
5.03
| | |
| |--- f27 <= 7.38
| | |
| | |--- class: 1
| | |
| |--- f27 > 7.38
| | |
| | |--- class: 1
| | |--- f20 > 6.57
| | |
|--- f27 <= 3.87
| | |
| |--- f28 <= 8.47
| |
| | |
|--- class: 1
| | |
| |--- f28 > 8.47
| | |
| | |--- class: 1
| | |
|--- f27 >
3.87
| | |
| |--- f44 <= 7.34
| | |
| | |--- class: 1
| | |
| |--- f44 > 7.34
| | |
| | |--- class: 1
| |--- f10 > 8.47
| | |--- f20 <= 7.56
| | |
|--- f28 <= 6.24
| | |
| |--- f21 <= 10.07
| | |
| | |--- class: 1
| | |
| |--- f21 > 10.07
| | |
| | |--- class: 1
| | |
|--- f28 >
6.24
| | | |
|--- f44 <= 7.94
| | |
| | |--- class: 1
| | |
| |--- f44 > 7.94
| | |
| | |--- class: 1
| | |--- f20 > 7.56
| | |
|--- f43 <= 5.97
| | |
| |--- f44 <= 8.76
| | |
| | |--- class: 1
| | | |
|--- f44 >
8.76
| | |
| | |--- class: 1
| | |
|--- f43 >
5.97
| | |
| |--- f27 <= 5.72
| | |
| | |--- class: 1
| | |
| |--- f27 > 5.72
| | |
| | |--- class: 1
f0 (Decision tree);
hasF0 (SWRL rules); pixel_0 (name of attribute in the original dataset);
f1 (Decision tree);
hasF1 (SWRL rules); pixel_1 (name of attribute in the original dataset);
f2 (Decision tree);
hasF2 (SWRL rules); pixel_2 (name of attribute in the original dataset);
f3 (Decision tree);
hasF3 (SWRL rules); pixel_3 (name of attribute in the original dataset);
f4 (Decision tree);
hasF4 (SWRL rules); pixel_4 (name of attribute in the original dataset);
f5 (Decision tree);
hasF5 (SWRL rules); pixel_5 (name of attribute in the original dataset);
f6 (Decision tree);
hasF6 (SWRL rules); pixel_6 (name of attribute in the original dataset);
f7 (Decision tree);
hasF7 (SWRL rules); pixel_7 (name of attribute in the original dataset);
f8 (Decision tree);
hasF8 (SWRL rules); pixel_8 (name of attribute in the original dataset);
f9 (Decision tree);
hasF9 (SWRL rules); pixel_9 (name of attribute in the original dataset);
f10 (Decision
tree); hasF10 (SWRL rules); pixel_10 (name of attribute in the original dataset);
f11 (Decision
tree); hasF11 (SWRL rules); pixel_11 (name of attribute in the original dataset);
f12 (Decision
tree); hasF12 (SWRL rules); pixel_12 (name of attribute in the original dataset);
f13 (Decision
tree); hasF13 (SWRL rules); pixel_13 (name of attribute in the original dataset);
f14 (Decision
tree); hasF14 (SWRL rules); pixel_14 (name of attribute in the original dataset);
f15 (Decision
tree); hasF15 (SWRL rules); pixel_15 (name of attribute in the original dataset);
f16 (Decision
tree); hasF16 (SWRL rules); pixel_16 (name of attribute in the original dataset);
f17 (Decision
tree); hasF17 (SWRL rules); pixel_17 (name of attribute in the original dataset);
f18 (Decision
tree); hasF18 (SWRL rules); pixel_18 (name of attribute in the original dataset);
f19 (Decision tree);
hasF19 (SWRL rules); pixel_19 (name of attribute in the original dataset);
f20 (Decision
tree); hasF20 (SWRL rules); pixel_20 (name of attribute in the original dataset);
f21 (Decision
tree); hasF21 (SWRL rules); pixel_21 (name of attribute in the original dataset);
f22 (Decision
tree); hasF22 (SWRL rules); pixel_22 (name of attribute in the original dataset);
f23 (Decision
tree); hasF23 (SWRL rules); pixel_23 (name of attribute in the original dataset);
f24 (Decision
tree); hasF24 (SWRL rules); pixel_24 (name of attribute in the original dataset);
f25 (Decision
tree); hasF25 (SWRL rules); pixel_25 (name of attribute in the original dataset);
f26 (Decision
tree); hasF26 (SWRL rules); pixel_26 (name of attribute in the original dataset);
f27 (Decision
tree); hasF27 (SWRL rules); pixel_27 (name of attribute in the original dataset);
f28 (Decision
tree); hasF28 (SWRL rules); pixel_28 (name of attribute in the original dataset);
f29 (Decision
tree); hasF29 (SWRL rules); pixel_29 (name of attribute in the original dataset);
f30 (Decision
tree); hasF30 (SWRL rules); pixel_30 (name of attribute in the original dataset);
f31 (Decision
tree); hasF31 (SWRL rules); pixel_31 (name of attribute in the original dataset);
f32 (Decision
tree); hasF32 (SWRL rules); pixel_32 (name of attribute in the original dataset);
f33 (Decision
tree); hasF33 (SWRL rules); pixel_33 (name of attribute in the original dataset);
f34 (Decision
tree); hasF34 (SWRL rules); pixel_34 (name of attribute in the original dataset);
f35 (Decision
tree); hasF35 (SWRL rules); pixel_35 (name of attribute in the original dataset);
f36 (Decision
tree); hasF36 (SWRL rules); pixel_36 (name of attribute in the original dataset);
f37 (Decision
tree); hasF37 (SWRL rules); pixel_37 (name of attribute in the original dataset);
f38 (Decision
tree); hasF38 (SWRL rules); pixel_38 (name of attribute in the original dataset);
f39 (Decision
tree); hasF39 (SWRL rules); pixel_39 (name of attribute in the original dataset);
f40 (Decision
tree); hasF40 (SWRL rules); pixel_40 (name of attribute in the original dataset);
f41 (Decision
tree); hasF41 (SWRL rules); pixel_41 (name of attribute in the original dataset);
f42 (Decision
tree); hasF42 (SWRL rules); pixel_42 (name of attribute in the original dataset);
f43 (Decision
tree); hasF43 (SWRL rules); pixel_43 (name of attribute in the original dataset);
f44 (Decision
tree); hasF44 (SWRL rules); pixel_44 (name of attribute in the original dataset);
f45 (Decision
tree); hasF45 (SWRL rules); pixel_45 (name of attribute in the original dataset);
f46 (Decision
tree); hasF46 (SWRL rules); pixel_46 (name of attribute in the original dataset);
f47 (Decision
tree); hasF47 (SWRL rules); pixel_47 (name of attribute in the original dataset);
f48 (Decision
tree); hasF48 (SWRL rules); pixel_48 (name of attribute in the original dataset);
f49 (Decision
tree); hasF49 (SWRL rules); pixel_49 (name of attribute in the original dataset);
f50 (Decision
tree); hasF50 (SWRL rules); pixel_50 (name of attribute in the original dataset);
f51 (Decision
tree); hasF51 (SWRL rules); pixel_51 (name of attribute in the original dataset);
f52 (Decision
tree); hasF52 (SWRL rules); pixel_52 (name of attribute in the original dataset);
f53 (Decision
tree); hasF53 (SWRL rules); pixel_53 (name of attribute in the original dataset);
f54 (Decision
tree); hasF54 (SWRL rules); pixel_54 (name of attribute in the original dataset);
f55 (Decision
tree); hasF55 (SWRL rules); pixel_55 (name of attribute in the original dataset);
f56 (Decision
tree); hasF56 (SWRL rules); pixel_56 (name of attribute in the original dataset);
f57 (Decision
tree); hasF57 (SWRL rules); pixel_57 (name of attribute in the original dataset);
f58 (Decision
tree); hasF58 (SWRL rules); pixel_58 (name of attribute in the original dataset);
f59 (Decision
tree); hasF59 (SWRL rules); pixel_59 (name of attribute in the original dataset);
f60 (Decision
tree); hasF60 (SWRL rules); pixel_60 (name of attribute in the original dataset);
f61 (Decision
tree); hasF61 (SWRL rules); pixel_61 (name of attribute in the original dataset);
f62 (Decision
tree); hasF62 (SWRL rules); pixel_62 (name of attribute in the original dataset);
f63 (Decision
tree); hasF63 (SWRL rules); pixel_63 (name of attribute in the original dataset);
class: 0 (Decision
tree); Class_0 (SWRL rules); 0 (name of class in the original dataset);
class: 1 (Decision
tree); Class_1 (SWRL rules); 1 (name of class in the original dataset);
class: 2 (Decision
tree); Class_2 (SWRL rules); 2 (name of class in the original dataset);
class: 3 (Decision
tree); Class_3 (SWRL rules); 3 (name of class in the original dataset);
class: 4 (Decision
tree); Class_4 (SWRL rules); 4 (name of class in the original dataset);
class: 5 (Decision
tree); Class_5 (SWRL rules); 5 (name of class in the original dataset);
class: 6 (Decision
tree); Class_6 (SWRL rules); 6 (name of class in the original dataset);
class: 7 (Decision
tree); Class_7 (SWRL rules); 7 (name of class in the original dataset);
class: 8 (Decision
tree); Class_8 (SWRL rules); 8 (name of class in the original dataset);
class: 9 (Decision
tree); Class_9 (SWRL rules); 9 (name of class in the original dataset);
Generated
SWRL Rules:
Unclassified(?p)
^ hasF12(?p, ?x13) ^ swrlb:lessThanOrEqual(?x13, 48.22) ^ hasF19(?p, ?x20) ^ swrlb:lessThanOrEqual(?x20,
47.11) ^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF60(?p,
?x61) ^ swrlb:lessThanOrEqual(?x61, 37.66) ^ hasF61(?p, ?x62) ^
swrlb:lessThanOrEqual(?x62, 64.37) -> Class_7(?p)
Unclassified(?p)
^ hasF12(?p, ?x13) ^ swrlb:lessThanOrEqual(?x13, 48.22) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF60(?p, ?x61) ^
swrlb:lessThanOrEqual(?x61, 37.66) ^ hasF61(?p, ?x62) ^ swrlb:greaterThan(?x62,
64.37) -> Class_1(?p)
Unclassified(?p)
^ hasF12(?p, ?x13) ^ swrlb:greaterThan(?x13, 48.22) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF60(?p, ?x61) ^
swrlb:lessThanOrEqual(?x61, 37.66) ^ hasF61(?p, ?x62) ^
swrlb:lessThanOrEqual(?x62, 62.89) -> Class_7(?p)
Unclassified(?p)
^ hasF12(?p, ?x13) ^ swrlb:greaterThan(?x13, 48.22) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF60(?p, ?x61) ^
swrlb:lessThanOrEqual(?x61, 37.66) ^ hasF61(?p, ?x62) ^ swrlb:greaterThan(?x62,
62.89) -> Class_7(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 54.60) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF28(?p, ?x29) ^
swrlb:lessThanOrEqual(?x29, 47.41) ^ hasF60(?p, ?x61) ^ swrlb:greaterThan(?x61,
37.66) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 54.60) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF28(?p, ?x29) ^ swrlb:greaterThan(?x29,
47.41) ^ hasF60(?p, ?x61) ^ swrlb:greaterThan(?x61, 37.66) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 54.60) ^ hasF12(?p, ?x13) ^
swrlb:lessThanOrEqual(?x13, 45.16) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF60(?p, ?x61) ^ swrlb:greaterThan(?x61,
37.66) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 54.60) ^ hasF12(?p, ?x13) ^
swrlb:greaterThan(?x13, 45.16) ^ hasF19(?p, ?x20) ^ swrlb:lessThanOrEqual(?x20,
47.11) ^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 52.01) ^ hasF60(?p,
?x61) ^ swrlb:greaterThan(?x61, 37.66) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF28(?p, ?x29) ^ swrlb:lessThanOrEqual(?x29, 68.77) ^ hasF44(?p,
?x45) ^ swrlb:lessThanOrEqual(?x45, 57.78) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF28(?p, ?x29) ^ swrlb:greaterThan(?x29, 68.77) ^ hasF44(?p, ?x45) ^
swrlb:lessThanOrEqual(?x45, 57.78) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF44(?p, ?x45) ^ swrlb:greaterThan(?x45, 57.78) ^ hasF5(?p, ?x6) ^
swrlb:lessThanOrEqual(?x6, 44.91) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF44(?p, ?x45) ^ swrlb:greaterThan(?x45, 57.78) ^ hasF5(?p, ?x6) ^
swrlb:greaterThan(?x6, 44.91) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF21(?p, ?x22) ^
swrlb:lessThanOrEqual(?x22, 57.35) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF43(?p, ?x44) ^ swrlb:lessThanOrEqual(?x44, 54.15) ->
Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF21(?p, ?x22) ^ swrlb:greaterThan(?x22,
57.35) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 52.01) ^ hasF43(?p, ?x44) ^
swrlb:lessThanOrEqual(?x44, 54.15) -> Class_9(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF28(?p, ?x29) ^ swrlb:lessThanOrEqual(?x29, 56.25) ^ hasF43(?p,
?x44) ^ swrlb:greaterThan(?x44, 54.15) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 49.87) ^ hasF19(?p, ?x20) ^
swrlb:lessThanOrEqual(?x20, 47.11) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28,
52.01) ^ hasF28(?p, ?x29) ^ swrlb:greaterThan(?x29, 56.25) ^ hasF43(?p, ?x44) ^
swrlb:greaterThan(?x44, 54.15) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 50.11) ^ hasF28(?p,
?x29) ^ swrlb:lessThanOrEqual(?x29, 40.92) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 50.11) ^ hasF28(?p, ?x29) ^
swrlb:lessThanOrEqual(?x29, 40.92) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 52.64) ^ hasF28(?p,
?x29) ^ swrlb:greaterThan(?x29, 40.92) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 52.64) ^ hasF28(?p, ?x29) ^
swrlb:greaterThan(?x29, 40.92) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 31.84) ^ hasF28(?p,
?x29) ^ swrlb:lessThanOrEqual(?x29, 62.10) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^ swrlb:greaterThan(?x20,
47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21, 47.57) ^ hasF27(?p, ?x28) ^
swrlb:lessThanOrEqual(?x28, 31.84) ^ hasF28(?p, ?x29) ^ swrlb:greaterThan(?x29,
62.10) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 31.84) ^ hasF44(?p, ?x45) ^
swrlb:lessThanOrEqual(?x45, 53.38) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
47.57) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 31.84) ^ hasF44(?p, ?x45) ^
swrlb:greaterThan(?x45, 53.38) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
53.62) ^ hasF21(?p, ?x22) ^ swrlb:lessThanOrEqual(?x22, 70.54) ^ hasF28(?p,
?x29) ^ swrlb:lessThanOrEqual(?x29, 48.38) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
53.62) ^ hasF21(?p, ?x22) ^ swrlb:greaterThan(?x22, 70.54) ^ hasF28(?p, ?x29) ^
swrlb:lessThanOrEqual(?x29, 48.38) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
53.62) ^ hasF28(?p, ?x29) ^ swrlb:greaterThan(?x29, 48.38) ^ hasF44(?p, ?x45) ^
swrlb:lessThanOrEqual(?x45, 57.12) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:lessThanOrEqual(?x21,
53.62) ^ hasF28(?p, ?x29) ^ swrlb:greaterThan(?x29, 48.38) ^ hasF44(?p, ?x45) ^
swrlb:greaterThan(?x45, 57.12) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
53.62) ^ hasF43(?p, ?x44) ^ swrlb:lessThanOrEqual(?x44, 45.61) ^ hasF44(?p,
?x45) ^ swrlb:lessThanOrEqual(?x45, 62.23) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
53.62) ^ hasF43(?p, ?x44) ^ swrlb:lessThanOrEqual(?x44, 45.61) ^ hasF44(?p,
?x45) ^ swrlb:greaterThan(?x45, 62.23) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
53.62) ^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 42.80) ^ hasF43(?p,
?x44) ^ swrlb:greaterThan(?x44, 45.61) -> Class_1(?p)
Unclassified(?p)
^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 56.53) ^ hasF19(?p, ?x20) ^
swrlb:greaterThan(?x20, 47.11) ^ hasF20(?p, ?x21) ^ swrlb:greaterThan(?x21,
53.62) ^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 42.80) ^ hasF43(?p, ?x44) ^
swrlb:greaterThan(?x44, 45.61) -> Class_1(?p)
----------
Dataset: iris ----------
NN Test
Accuracy: 1.0000
Extracted
Decision Tree Rules:
|--- f2 <= 2.25
| |--- f3 <= 1.35
| | |--- f3 <= 1.00
| | |
|--- f2 <= 2.07
| | |
| |--- f2 <= 1.89
| | |
| | |--- class: 1
| | |
| |--- f2 > 1.89
| | |
| | |--- class: 1
| | |
|--- f2 >
2.07
| | |
| |--- f3 <= 0.72
| | |
| | |--- class: 1
| | |
| |--- f3 > 0.72
| | |
| | |--- class: 2
| | |--- f3 > 1.00
| | |
|--- f2 <= 1.72
| | |
| |--- f1 <= 2.74
| | |
| | |--- class: 1
| | |
| |--- f1 > 2.74
| | |
| | |--- class: 1
| | |
|--- f2 >
1.72
| | |
| |--- f1 <= 3.75
| | |
| | |--- class: 2
| | |
| |--- f1 > 3.75
| | |
| | |--- class: 1
| |--- f3 > 1.35
| | |--- f2 <= 1.42
| | |
|--- f3 <= 1.83
| | |
| |--- f1 <= 3.11
| | |
| | |--- class: 2
| | |
| |--- f1 > 3.11
| | |
| | |--- class: 1
| | |
|--- f3 >
1.83
| | |
| |--- f1 <= 4.02
| | |
| | |--- class: 2
| | |
| |--- f1 > 4.02
| | |
| | |--- class: 2
| | |--- f2 > 1.42
| | |
|--- f3 <= 1.62
| | |
| |--- f1 <= 3.83
| | |
| | |--- class: 2
| | |
| |--- f1 > 3.83
| | |
| | |--- class: 2
| | |
|--- f3 >
1.62
| | |
| |--- f1 <= 4.40
| | |
| | |--- class: 2
| | |
| |--- f1 > 4.40
| | |
| | |--- class: 2
|--- f2 >
2.25
| |--- f2 <= 2.70
| | |--- f3 <= 0.63
| | |
|--- f1 <= 2.98
| | |
| |--- f3 <= 0.35
| | |
| | |--- class: 1
| | |
| |--- f3 > 0.35
| | |
| | |--- class: 2
| | |
|--- f1 >
2.98
| | |
| |--- f2 <= 2.61
| | |
| | |--- class: 1
| | |
| |--- f2 > 2.61
| | |
| | |--- class: 1
| | |--- f3 > 0.63
| | |
|--- f3 <= 0.83
| | |
| |--- f1 <= 3.76
| | |
| | |--- class: 2
| | |
| |--- f1 > 3.76
| | |
| | |--- class: 1
| | |
|--- f3 >
0.83
| | |
| |--- f3 <= 0.91
| | |
| | |--- class: 2
| | |
| |--- f3 > 0.91
| | |
| | |--- class: 2
| |--- f2 > 2.70
| | |--- f2 <= 3.00
| | |
|--- f3 <= 0.38
| | |
| |--- f1 <= 3.53
| | |
| | |--- class: 2
| | |
| |--- f1 > 3.53
| | |
| | |--- class: 1
| | |
|--- f3 >
0.38
| | |
| |--- f3 <= 0.48
| | |
| | |--- class: 2
| | |
| |--- f3 > 0.48
| | |
| | |--- class: 2
| | |--- f2 > 3.00
| | |
|--- f2 <= 3.08
| | |
| |--- f3 <= 0.23
| | |
| | |--- class: 2
| | |
| |--- f3 > 0.23
| | |
| | |--- class: 2
| | |
|--- f2 >
3.08
| | |
| |--- f3 <= 0.12
| | |
| | |--- class: 2
| | |
| |--- f3 > 0.12
| | |
| | |--- class: 2
f0 (Decision tree); hasF0 (SWRL
rules); sepal length (cm) (name of attribute in the original dataset);
f1 (Decision tree); hasF1 (SWRL
rules); sepal width (cm) (name of attribute in the original dataset);
f2 (Decision tree); hasF2 (SWRL
rules); petal length (cm) (name of attribute in the original dataset);
f3 (Decision tree); hasF3 (SWRL
rules); petal width (cm) (name of attribute in the original dataset);
class: 0 (Decision tree); Class_0 (SWRL
rules); setosa (name of class in the original dataset);
class: 1 (Decision tree); Class_1
(SWRL rules); versicolor (name of class in the original dataset);
class: 2 (Decision tree); Class_2
(SWRL rules); virginica (name of class in the original dataset);
Generated
SWRL Rules:
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 7.02) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 7.34) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_0(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.02) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 7.34) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_0(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.34) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 7.66) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4,
1.72) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_0(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.34) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4,
1.72) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.29) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 6.74) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.29) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 6.74) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.74) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 6.74) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.74) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 6.74) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.94) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.20) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.45) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.55) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.45) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.55) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.86) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 2.55) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.86) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 2.55) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.77) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.39) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.77) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 2.39) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 5.03) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 2.39) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 5.03) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 6.21) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
7.66) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.20) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 2.39) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.39) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.44) ^ hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4, 1.44) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.65) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.39) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.44) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.44) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.65) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.39) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.27) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 8.44) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.65) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.39) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.27) ^
hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 8.44) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.65) -> Class_0(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.75) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.44) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.65) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.81) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.75) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.44) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.65) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.81) -> Class_0(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 8.44) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4,
1.65) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.81) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.86) -> Class_1(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:lessThanOrEqual(?x3, 8.44) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4,
1.65) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.81) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 1.86) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:lessThanOrEqual(?x2, 4.64) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.44) ^
hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 8.95) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.47) -> Class_1(?p)
Unclassified(?p)
^ hasF1(?p, ?x2) ^ swrlb:greaterThan(?x2, 4.64) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.44) ^
hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 8.95) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.47) -> Class_0(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 8.44) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.95) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.47) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.54) -> Class_1(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 8.44) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3,
8.95) ^ hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 1.47) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 1.54) -> Class_1(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 8.44) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.95) ^
hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 9.11) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.35) -> Class_1(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 8.44) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.95) ^
hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 9.11) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 1.35) -> Class_1(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 8.44) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.95) ^
hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 9.11) ^ hasF3(?p, ?x4) ^
swrlb:lessThanOrEqual(?x4, 1.27) -> Class_1(?p)
Unclassified(?p)
^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.66) ^ hasF2(?p, ?x3) ^
swrlb:greaterThan(?x3, 8.44) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.95) ^
hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 9.11) ^ hasF3(?p, ?x4) ^
swrlb:greaterThan(?x4, 1.27) -> Class_1(?p)
EXPERIMENT with Shallow Decision Tree
(FORMER UNVERFIED DRAFT!
Without rules-optimization):
import numpy as np
import pandas as pd
from sklearn.datasets
import load_digits, load_wine,
load_iris
from sklearn.model_selection
import train_test_split
from sklearn.preprocessing
import StandardScaler
from sklearn.neural_network
import MLPClassifier
from sklearn.tree
import DecisionTreeClassifier,
export_text
import torch
import re
import requests
from io import StringIO
def load_and_process_data(dataset_name):
"""Loads
and preprocesses a dataset."""
if dataset_name == "digits":
digits = load_digits()
X = digits.data
y = digits.target
feature_names = [f"pixel_{i}" for i in range(X.shape[1])]
class_names = digits.target_names
elif dataset_name == "wine":
wine = load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names
class_names = wine.target_names
elif dataset_name == "iris":
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names
elif dataset_name == "breast_cancer":
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
s = requests.get(url).content
df = pd.read_csv(StringIO(s.decode('utf-8')), header=None)
X = df.iloc[:,
2:].values
y = df.iloc[:,
1].map({'M': 1, 'B': 0}).values
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
class_names = np.unique(y)
else:
raise ValueError("Invalid dataset name")
# Create feature
mapping dictionary
feature_mapping = {f"f{i}": (f"hasF{i}",
feature_names[i]) for i in range(len(feature_names))}
# Create class mapping
dictionary
class_mapping = {f"class: {i}": (f"Class_{i}", class_names[i]) for i in range(len(class_names))}
# Split dataset
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.2, random_state=42)
# Normalize
features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, X_test, y_train, y_test, feature_mapping, class_mapping, X.shape[1], scaler
def train_models(X_train, X_test, y_train, y_test, X_shape, scaler, num_synthetic_samples = None):
"""Trains
a neural network and a decision tree."""
# Train neural
network with regularization and validation
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42, alpha=0.01)
mlp.fit(X_train,
y_train)
print(f'NN Test
Accuracy: {mlp.score(X_test, y_test):.4f}')
# ----------
Refined Stage 2: Generate Synthetic Samples ----------
def generate_synthetic_samples(model, num_samples, feature_ranges):
"""Generates
synthetic samples using a trained neural network.
Args:
model:
Trained neural network model (MLPClassifier).
num_samples:
Number of synthetic samples to generate.
feature_ranges: A list of tuples, each tuple containing min/max values for each
feature.
Returns:
A tuple
containing synthetic data samples and their predicted labels.
"""
synthetic_samples = []
for _ in range(num_samples):
#Generate a synthetic sample within the feature ranges
sample = np.array([np.random.uniform(low, high) for low, high in feature_ranges])
synthetic_samples.append(sample)
synthetic_samples = np.array(synthetic_samples)
# Get predictions
from the model
synthetic_probs = model.predict_proba(synthetic_samples)
synthetic_labels = np.argmax(synthetic_probs, axis=1) # Convert probs to class labels.
return synthetic_samples, synthetic_labels
# Define the range
of features based on the original training data
feature_ranges =
[(scaler.inverse_transform(np.array([[X_train[:, i].min() if j == i else 0 for j in range(X_shape)]]))[0][i], scaler.inverse_transform(np.array([[X_train[:, i].max() if j == i else 0 for j in range(X_shape)]]))[0][i]) for i in range(X_shape)]
# Generate
synthetic data
if num_synthetic_samples is None:
num_synthetic_samples = len(X_train) # Number of
synthetic samples
X_synthetic, y_synthetic =
generate_synthetic_samples(mlp, num_synthetic_samples,
feature_ranges)
# ---------- End
of Refined Stage 2 ----------
# Inverse
transform the synthetic data
X_synthetic_original_scale = scaler.inverse_transform(X_synthetic)
# Train decision
tree on the synthetic data
clf = DecisionTreeClassifier(max_depth=2, random_state=42)
clf.fit(X_synthetic_original_scale,
y_synthetic)
return clf, mlp, X_synthetic_original_scale, scaler
#
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
def tree_to_swrl(tree, feature_names, class_mapping):
rules = []
def recurse(node, conditions, var_map, parent_bounds):
if tree.children_left[node] == -1 and
tree.children_right[node] == -1:
predicted_class = np.argmax(tree.value[node])
class_name =
class_mapping[f"class: {predicted_class}"][0]
rule = "Unclassified(?p) ^ " + " ^ ".join(sorted(conditions)) + f' -> {class_name}(?p)'
rules.append(rule)
return
feature_index = tree.feature[node]
feature = feature_names[feature_index]
threshold = tree.threshold[node]
var = f'?x{feature_index + 1}'
left_condition = f'has{feature.capitalize().replace(" ", "")}(?p, {var}) ^
swrlb:lessThanOrEqual({var}, {threshold:.2f})'
right_condition = f'has{feature.capitalize().replace(" ", "")}(?p, {var}) ^
swrlb:greaterThan({var}, {threshold:.2f})'
new_parent_bounds =
parent_bounds.copy()
# Avoid redundant
conditions
if not any(v == var and op == "leq" and threshold >= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "leq", threshold))
if not any(v == var and op == "gt" and threshold <= t for v, op, t in parent_bounds):
new_parent_bounds.append((var, "gt", threshold))
recurse(tree.children_left[node],
conditions + [left_condition], var_map.copy(), new_parent_bounds)
recurse(tree.children_right[node],
conditions + [right_condition], var_map.copy(), new_parent_bounds)
recurse(0, [], {}, [])
return rules
dataset_names = ["digits", "wine", "iris", "breast_cancer"]
num_synthetic_samples = 40000 # Set to the desired number of samples
for dataset_name in dataset_names:
print(f"\n---------- Dataset: {dataset_name} ----------\n")
# Load and process data
X_train, X_test, y_train, y_test,
feature_mapping, class_mapping, X_shape, scaler = load_and_process_data(dataset_name)
# Train the models
clf, mlp, X_synthetic, scaler = train_models(X_train, X_test, y_train, y_test, X_shape, scaler,
num_synthetic_samples)
# Print decision
tree
print('\nExtracted Decision Tree
Rules:\n')
print(export_text(clf,
feature_names=[f"f{i}" for i in range(X_synthetic.shape[1])]))
for key, (swrl_name, actual_name) in feature_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of
attribute in the original dataset);")
print('\n')
for key, (swrl_name, actual_name) in class_mapping.items():
print(f"{key} (Decision tree); {swrl_name} (SWRL rules); {actual_name} (name of class
in the original dataset);")
# Generate and
print SWRL rules
swrl_rules = tree_to_swrl(clf.tree_,
[f"f{i}" for i in range(X_synthetic.shape[1])],
class_mapping)
print('\nGenerated SWRL Rules:\n')
for rule in swrl_rules:
print(rule)
----------
Dataset: digits ----------
NN Test
Accuracy: 0.9750
Extracted
Decision Tree Rules:
|--- f19 <= 56.53
| |--- f27 <= 51.68
| | |--- class: 1
| |--- f27 > 51.68
| | |--- class: 1
|--- f19 > 56.53
| |--- f10 <= 57.96
| | |--- class: 1
| |--- f10 > 57.96
| | |--- class: 1
f0 (Decision tree); hasF0 (SWRL
rules); pixel_0 (name of attribute in the original dataset);
f1 (Decision tree); hasF1 (SWRL
rules); pixel_1 (name of attribute in the original dataset);
f2 (Decision tree); hasF2 (SWRL
rules); pixel_2 (name of attribute in the original dataset);
f3 (Decision tree); hasF3 (SWRL
rules); pixel_3 (name of attribute in the original dataset);
f4 (Decision tree); hasF4 (SWRL
rules); pixel_4 (name of attribute in the original dataset);
f5 (Decision tree); hasF5 (SWRL
rules); pixel_5 (name of attribute in the original dataset);
f6 (Decision tree); hasF6 (SWRL
rules); pixel_6 (name of attribute in the original dataset);
f7 (Decision tree); hasF7 (SWRL
rules); pixel_7 (name of attribute in the original dataset);
f8 (Decision tree); hasF8 (SWRL
rules); pixel_8 (name of attribute in the original dataset);
f9 (Decision tree); hasF9 (SWRL
rules); pixel_9 (name of attribute in the original dataset);
f10 (Decision tree); hasF10 (SWRL
rules); pixel_10 (name of attribute in the original dataset);
f11 (Decision tree); hasF11 (SWRL
rules); pixel_11 (name of attribute in the original dataset);
f12 (Decision tree); hasF12 (SWRL
rules); pixel_12 (name of attribute in the original dataset);
f13 (Decision tree); hasF13 (SWRL
rules); pixel_13 (name of attribute in the original dataset);
f14 (Decision tree); hasF14 (SWRL
rules); pixel_14 (name of attribute in the original dataset);
f15 (Decision tree); hasF15 (SWRL
rules); pixel_15 (name of attribute in the original dataset);
f16 (Decision tree); hasF16 (SWRL
rules); pixel_16 (name of attribute in the original dataset);
f17 (Decision tree); hasF17 (SWRL
rules); pixel_17 (name of attribute in the original dataset);
f18 (Decision tree); hasF18 (SWRL
rules); pixel_18 (name of attribute in the original dataset);
f19 (Decision tree); hasF19 (SWRL
rules); pixel_19 (name of attribute in the original dataset);
f20 (Decision tree); hasF20 (SWRL
rules); pixel_20 (name of attribute in the original dataset);
f21 (Decision tree); hasF21 (SWRL
rules); pixel_21 (name of attribute in the original dataset);
f22 (Decision tree); hasF22 (SWRL
rules); pixel_22 (name of attribute in the original dataset);
f23 (Decision tree); hasF23 (SWRL
rules); pixel_23 (name of attribute in the original dataset);
f24 (Decision tree); hasF24 (SWRL
rules); pixel_24 (name of attribute in the original dataset);
f25 (Decision tree); hasF25 (SWRL
rules); pixel_25 (name of attribute in the original dataset);
f26 (Decision tree); hasF26 (SWRL
rules); pixel_26 (name of attribute in the original dataset);
f27 (Decision tree); hasF27 (SWRL
rules); pixel_27 (name of attribute in the original dataset);
f28 (Decision tree); hasF28 (SWRL
rules); pixel_28 (name of attribute in the original dataset);
f29 (Decision tree); hasF29 (SWRL
rules); pixel_29 (name of attribute in the original dataset);
f30 (Decision tree); hasF30 (SWRL
rules); pixel_30 (name of attribute in the original dataset);
f31 (Decision tree); hasF31 (SWRL
rules); pixel_31 (name of attribute in the original dataset);
f32 (Decision tree); hasF32 (SWRL
rules); pixel_32 (name of attribute in the original dataset);
f33 (Decision tree); hasF33 (SWRL
rules); pixel_33 (name of attribute in the original dataset);
f34 (Decision tree); hasF34 (SWRL
rules); pixel_34 (name of attribute in the original dataset);
f35 (Decision tree); hasF35 (SWRL
rules); pixel_35 (name of attribute in the original dataset);
f36 (Decision tree); hasF36 (SWRL
rules); pixel_36 (name of attribute in the original dataset);
f37 (Decision tree); hasF37 (SWRL
rules); pixel_37 (name of attribute in the original dataset);
f38 (Decision tree); hasF38 (SWRL
rules); pixel_38 (name of attribute in the original dataset);
f39 (Decision tree); hasF39 (SWRL
rules); pixel_39 (name of attribute in the original dataset);
f40 (Decision tree); hasF40 (SWRL
rules); pixel_40 (name of attribute in the original dataset);
f41 (Decision tree); hasF41 (SWRL
rules); pixel_41 (name of attribute in the original dataset);
f42 (Decision tree); hasF42 (SWRL
rules); pixel_42 (name of attribute in the original dataset);
f43 (Decision tree); hasF43 (SWRL
rules); pixel_43 (name of attribute in the original dataset);
f44 (Decision tree); hasF44 (SWRL
rules); pixel_44 (name of attribute in the original dataset);
f45 (Decision tree); hasF45 (SWRL
rules); pixel_45 (name of attribute in the original dataset);
f46 (Decision tree); hasF46 (SWRL
rules); pixel_46 (name of attribute in the original dataset);
f47 (Decision tree); hasF47 (SWRL
rules); pixel_47 (name of attribute in the original dataset);
f48 (Decision tree); hasF48 (SWRL
rules); pixel_48 (name of attribute in the original dataset);
f49 (Decision tree); hasF49 (SWRL
rules); pixel_49 (name of attribute in the original dataset);
f50 (Decision tree); hasF50 (SWRL
rules); pixel_50 (name of attribute in the original dataset);
f51 (Decision tree); hasF51 (SWRL
rules); pixel_51 (name of attribute in the original dataset);
f52 (Decision tree); hasF52 (SWRL
rules); pixel_52 (name of attribute in the original dataset);
f53 (Decision tree); hasF53 (SWRL
rules); pixel_53 (name of attribute in the original dataset);
f54 (Decision tree); hasF54 (SWRL
rules); pixel_54 (name of attribute in the original dataset);
f55 (Decision tree); hasF55 (SWRL
rules); pixel_55 (name of attribute in the original dataset);
f56 (Decision tree); hasF56 (SWRL
rules); pixel_56 (name of attribute in the original dataset);
f57 (Decision tree); hasF57 (SWRL
rules); pixel_57 (name of attribute in the original dataset);
f58 (Decision tree); hasF58 (SWRL
rules); pixel_58 (name of attribute in the original dataset);
f59 (Decision tree); hasF59 (SWRL
rules); pixel_59 (name of attribute in the original dataset);
f60 (Decision tree); hasF60 (SWRL
rules); pixel_60 (name of attribute in the original dataset);
f61 (Decision tree); hasF61 (SWRL
rules); pixel_61 (name of attribute in the original dataset);
f62 (Decision tree); hasF62 (SWRL
rules); pixel_62 (name of attribute in the original dataset);
f63 (Decision tree); hasF63 (SWRL
rules); pixel_63 (name of attribute in the original dataset);
class: 0 (Decision tree); Class_0
(SWRL rules); 0 (name of class in the original dataset);
class: 1 (Decision tree); Class_1
(SWRL rules); 1 (name of class in the original dataset);
class: 2 (Decision tree); Class_2
(SWRL rules); 2 (name of class in the original dataset);
class: 3 (Decision tree); Class_3
(SWRL rules); 3 (name of class in the original dataset);
class: 4 (Decision tree); Class_4
(SWRL rules); 4 (name of class in the original dataset);
class: 5 (Decision tree); Class_5
(SWRL rules); 5 (name of class in the original dataset);
class: 6 (Decision tree); Class_6
(SWRL rules); 6 (name of class in the original dataset);
class: 7 (Decision tree); Class_7
(SWRL rules); 7 (name of class in the original dataset);
class: 8 (Decision tree); Class_8
(SWRL rules); 8 (name of class in the original dataset);
class: 9 (Decision tree); Class_9
(SWRL rules); 9 (name of class in the original dataset);
Generated
SWRL Rules:
Unclassified(?p) ^ hasF19(?p, ?x20) ^ swrlb:lessThanOrEqual(?x20, 56.53)
^ hasF27(?p, ?x28) ^ swrlb:lessThanOrEqual(?x28, 51.68) -> Class_1(?p)
Unclassified(?p) ^ hasF19(?p, ?x20) ^ swrlb:lessThanOrEqual(?x20, 56.53)
^ hasF27(?p, ?x28) ^ swrlb:greaterThan(?x28, 51.68) -> Class_1(?p)
Unclassified(?p) ^ hasF10(?p, ?x11) ^ swrlb:lessThanOrEqual(?x11, 57.96)
^ hasF19(?p, ?x20) ^ swrlb:greaterThan(?x20, 56.53) -> Class_1(?p)
Unclassified(?p) ^ hasF10(?p, ?x11) ^ swrlb:greaterThan(?x11, 57.96) ^
hasF19(?p, ?x20) ^ swrlb:greaterThan(?x20, 56.53) -> Class_1(?p)
----------
Dataset: iris ----------
NN Test
Accuracy: 1.0000
Extracted
Decision Tree Rules:
|--- f2 <= 7.75
| |--- f3 <= 2.09
| |
|--- class: 1
| |--- f3 > 2.09
| |
|--- class: 2
|--- f2 > 7.75
| |--- f2 <= 8.47
| |
|--- class: 2
| |--- f2 > 8.47
| |
|--- class: 2
f0 (Decision tree); hasF0 (SWRL
rules); sepal length (cm) (name of attribute in the original dataset);
f1 (Decision tree); hasF1 (SWRL
rules); sepal width (cm) (name of attribute in the original dataset);
f2 (Decision tree); hasF2 (SWRL
rules); petal length (cm) (name of attribute in the original dataset);
f3 (Decision tree); hasF3 (SWRL
rules); petal width (cm) (name of attribute in the original dataset);
class: 0 (Decision tree); Class_0
(SWRL rules); setosa (name of class in the original dataset);
class: 1 (Decision tree); Class_1
(SWRL rules); versicolor (name of class in the original dataset);
class: 2 (Decision tree); Class_2
(SWRL rules); virginica (name of class in the original dataset);
Generated
SWRL Rules:
Unclassified(?p) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 7.75) ^
hasF3(?p, ?x4) ^ swrlb:lessThanOrEqual(?x4, 2.09) -> Class_0(?p)
Unclassified(?p) ^ hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 7.75) ^
hasF3(?p, ?x4) ^ swrlb:greaterThan(?x4, 2.09) -> Class_1(?p)
Unclassified(?p) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.75) ^
hasF2(?p, ?x3) ^ swrlb:lessThanOrEqual(?x3, 8.47) -> Class_1(?p)
Unclassified(?p) ^ hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 7.75) ^
hasF2(?p, ?x3) ^ swrlb:greaterThan(?x3, 8.47) -> Class_1(?p)
-------------------------------------------------------------------------------------