lopezkor000 commited on
Commit
c394ae1
·
1 Parent(s): e9bd732
Files changed (5) hide show
  1. Dockerfile +29 -0
  2. README.md +74 -13
  3. app.py +51 -0
  4. requirements.txt +3 -0
  5. templates/index.html +251 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first for better caching
11
+ COPY requirements.txt .
12
+
13
+ # Install Python dependencies
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy application files
17
+ COPY app.py .
18
+ COPY templates/ templates/
19
+ COPY static/ static/
20
+
21
+ # Expose the port
22
+ EXPOSE 7860
23
+
24
+ # Set environment variables for Hugging Face Spaces
25
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
26
+ ENV GRADIO_SERVER_PORT=7860
27
+
28
+ # Run the application
29
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,13 +1,74 @@
1
- ---
2
- title: Llm Projec 2
3
- emoji: 🐨
4
- colorFrom: red
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prompt Safety Checker Web App
2
+
3
+ A Flask web application that uses the `viccon23/STU-Injection-aegis` Hugging Face model to classify prompts as safe or unsafe.
4
+
5
+ ## Features
6
+
7
+ - 🛡️ Real-time prompt safety classification
8
+ - 🎨 Modern, responsive UI
9
+ - 📊 Confidence score display
10
+ - ⚡ Fast inference with PyTorch
11
+
12
+ ## Installation
13
+
14
+ 1. Install dependencies:
15
+ ```bash
16
+ pip install -r requirements.txt
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ### Local Development
22
+
23
+ 1. Start the web server:
24
+ ```bash
25
+ python app.py
26
+ ```
27
+
28
+ 2. Open your browser and navigate to:
29
+ ```
30
+ http://localhost:7860
31
+ ```
32
+
33
+ 3. Enter a prompt in the text area and click "Check Safety"
34
+
35
+ ### Docker Deployment
36
+
37
+ Build and run with Docker:
38
+ ```bash
39
+ docker build -t prompt-safety-checker .
40
+ docker run -p 7860:7860 prompt-safety-checker
41
+ ```
42
+
43
+ ### Hugging Face Spaces Deployment
44
+
45
+ 1. Create a new Docker Space on Hugging Face
46
+ 2. Upload the following files:
47
+ - `Dockerfile`
48
+ - `app.py`
49
+ - `requirements.txt`
50
+ - `templates/index.html`
51
+ 3. The app will automatically deploy on port 7860
52
+
53
+ ## Model
54
+
55
+ This app uses the [STU-Injection-aegis](https://huggingface.co/viccon23/STU-Injection-aegis) model from Hugging Face, which is designed to detect potentially unsafe or malicious prompts that could be used for injection attacks.
56
+
57
+ ## API Endpoint
58
+
59
+ You can also use the API directly:
60
+
61
+ ```bash
62
+ curl -X POST http://localhost:5000/classify \
63
+ -H "Content-Type: application/json" \
64
+ -d '{"prompt": "Your prompt here"}'
65
+ ```
66
+
67
+ Response:
68
+ ```json
69
+ {
70
+ "label": "Safe",
71
+ "confidence": 95.32,
72
+ "predicted_class": 0
73
+ }
74
+ ```
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+
5
+ app = Flask(__name__)
6
+
7
+ # Load model and tokenizer
8
+ MODEL_NAME = "viccon23/STU-Injection-aegis"
9
+ print("Loading model...")
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
12
+ print("Model loaded successfully!")
13
+
14
+ def classify_prompt(text):
15
+ """Classify if a prompt is safe or unsafe"""
16
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
17
+
18
+ with torch.no_grad():
19
+ outputs = model(**inputs)
20
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
21
+ predicted_class = torch.argmax(predictions, dim=-1).item()
22
+ confidence = predictions[0][predicted_class].item()
23
+
24
+ # Assuming class 0 is safe and class 1 is unsafe
25
+ label = "Safe" if predicted_class == 0 else "Unsafe"
26
+
27
+ return {
28
+ "label": label,
29
+ "confidence": round(confidence * 100, 2),
30
+ "predicted_class": predicted_class
31
+ }
32
+
33
+ @app.route('/')
34
+ def index():
35
+ return render_template('index.html')
36
+
37
+ @app.route('/classify', methods=['POST'])
38
+ def classify():
39
+ data = request.get_json()
40
+ prompt = data.get('prompt', '')
41
+
42
+ if not prompt:
43
+ return jsonify({"error": "No prompt provided"}), 400
44
+
45
+ result = classify_prompt(prompt)
46
+ return jsonify(result)
47
+
48
+ if __name__ == '__main__':
49
+ import os
50
+ port = int(os.environ.get('PORT', 7860))
51
+ app.run(debug=False, host='0.0.0.0', port=port)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ flask==3.0.0
2
+ transformers==4.36.0
3
+ torch==2.1.0
templates/index.html ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Prompt Safety Checker</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ body {
15
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
16
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
17
+ min-height: 100vh;
18
+ display: flex;
19
+ justify-content: center;
20
+ align-items: center;
21
+ padding: 20px;
22
+ }
23
+
24
+ .container {
25
+ background: white;
26
+ border-radius: 20px;
27
+ box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
28
+ padding: 40px;
29
+ max-width: 700px;
30
+ width: 100%;
31
+ }
32
+
33
+ h1 {
34
+ color: #333;
35
+ margin-bottom: 10px;
36
+ text-align: center;
37
+ }
38
+
39
+ .subtitle {
40
+ color: #666;
41
+ text-align: center;
42
+ margin-bottom: 30px;
43
+ font-size: 14px;
44
+ }
45
+
46
+ .input-group {
47
+ margin-bottom: 20px;
48
+ }
49
+
50
+ label {
51
+ display: block;
52
+ margin-bottom: 10px;
53
+ color: #555;
54
+ font-weight: 600;
55
+ }
56
+
57
+ textarea {
58
+ width: 100%;
59
+ padding: 15px;
60
+ border: 2px solid #e0e0e0;
61
+ border-radius: 10px;
62
+ font-size: 16px;
63
+ font-family: inherit;
64
+ resize: vertical;
65
+ min-height: 150px;
66
+ transition: border-color 0.3s;
67
+ }
68
+
69
+ textarea:focus {
70
+ outline: none;
71
+ border-color: #667eea;
72
+ }
73
+
74
+ button {
75
+ width: 100%;
76
+ padding: 15px;
77
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
78
+ color: white;
79
+ border: none;
80
+ border-radius: 10px;
81
+ font-size: 18px;
82
+ font-weight: 600;
83
+ cursor: pointer;
84
+ transition: transform 0.2s, box-shadow 0.2s;
85
+ }
86
+
87
+ button:hover {
88
+ transform: translateY(-2px);
89
+ box-shadow: 0 10px 20px rgba(102, 126, 234, 0.4);
90
+ }
91
+
92
+ button:active {
93
+ transform: translateY(0);
94
+ }
95
+
96
+ button:disabled {
97
+ background: #ccc;
98
+ cursor: not-allowed;
99
+ transform: none;
100
+ }
101
+
102
+ .result {
103
+ margin-top: 30px;
104
+ padding: 25px;
105
+ border-radius: 10px;
106
+ display: none;
107
+ animation: slideIn 0.3s ease-out;
108
+ }
109
+
110
+ @keyframes slideIn {
111
+ from {
112
+ opacity: 0;
113
+ transform: translateY(-10px);
114
+ }
115
+ to {
116
+ opacity: 1;
117
+ transform: translateY(0);
118
+ }
119
+ }
120
+
121
+ .result.safe {
122
+ background: #d4edda;
123
+ border: 2px solid #28a745;
124
+ }
125
+
126
+ .result.unsafe {
127
+ background: #f8d7da;
128
+ border: 2px solid #dc3545;
129
+ }
130
+
131
+ .result-label {
132
+ font-size: 24px;
133
+ font-weight: 700;
134
+ margin-bottom: 10px;
135
+ }
136
+
137
+ .result.safe .result-label {
138
+ color: #28a745;
139
+ }
140
+
141
+ .result.unsafe .result-label {
142
+ color: #dc3545;
143
+ }
144
+
145
+ .result-confidence {
146
+ font-size: 16px;
147
+ color: #555;
148
+ }
149
+
150
+ .loading {
151
+ text-align: center;
152
+ color: #667eea;
153
+ font-weight: 600;
154
+ margin-top: 20px;
155
+ display: none;
156
+ }
157
+
158
+ .error {
159
+ background: #f8d7da;
160
+ border: 2px solid #dc3545;
161
+ color: #721c24;
162
+ padding: 15px;
163
+ border-radius: 10px;
164
+ margin-top: 20px;
165
+ display: none;
166
+ }
167
+ </style>
168
+ </head>
169
+ <body>
170
+ <div class="container">
171
+ <h1>🛡️ Prompt Safety Checker</h1>
172
+ <p class="subtitle">Using STU-Injection-aegis Model</p>
173
+
174
+ <div class="input-group">
175
+ <label for="prompt">Enter your prompt:</label>
176
+ <textarea id="prompt" placeholder="Type or paste your prompt here..."></textarea>
177
+ </div>
178
+
179
+ <button id="checkBtn" onclick="checkPrompt()">Check Safety</button>
180
+
181
+ <div class="loading" id="loading">Analyzing prompt...</div>
182
+ <div class="error" id="error"></div>
183
+ <div class="result" id="result">
184
+ <div class="result-label" id="resultLabel"></div>
185
+ <div class="result-confidence" id="resultConfidence"></div>
186
+ </div>
187
+ </div>
188
+
189
+ <script>
190
+ async function checkPrompt() {
191
+ const promptText = document.getElementById('prompt').value.trim();
192
+ const resultDiv = document.getElementById('result');
193
+ const loadingDiv = document.getElementById('loading');
194
+ const errorDiv = document.getElementById('error');
195
+ const checkBtn = document.getElementById('checkBtn');
196
+
197
+ if (!promptText) {
198
+ errorDiv.textContent = 'Please enter a prompt to check.';
199
+ errorDiv.style.display = 'block';
200
+ resultDiv.style.display = 'none';
201
+ return;
202
+ }
203
+
204
+ // Reset displays
205
+ resultDiv.style.display = 'none';
206
+ errorDiv.style.display = 'none';
207
+ loadingDiv.style.display = 'block';
208
+ checkBtn.disabled = true;
209
+
210
+ try {
211
+ const response = await fetch('/classify', {
212
+ method: 'POST',
213
+ headers: {
214
+ 'Content-Type': 'application/json',
215
+ },
216
+ body: JSON.stringify({ prompt: promptText })
217
+ });
218
+
219
+ if (!response.ok) {
220
+ throw new Error('Failed to classify prompt');
221
+ }
222
+
223
+ const data = await response.json();
224
+
225
+ // Display result
226
+ resultDiv.className = 'result ' + data.label.toLowerCase();
227
+ document.getElementById('resultLabel').textContent = data.label;
228
+ document.getElementById('resultConfidence').textContent =
229
+ `Confidence: ${data.confidence}%`;
230
+
231
+ loadingDiv.style.display = 'none';
232
+ resultDiv.style.display = 'block';
233
+
234
+ } catch (error) {
235
+ loadingDiv.style.display = 'none';
236
+ errorDiv.textContent = 'Error: ' + error.message;
237
+ errorDiv.style.display = 'block';
238
+ } finally {
239
+ checkBtn.disabled = false;
240
+ }
241
+ }
242
+
243
+ // Allow Enter key to submit (with Ctrl/Cmd)
244
+ document.getElementById('prompt').addEventListener('keydown', function(e) {
245
+ if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) {
246
+ checkPrompt();
247
+ }
248
+ });
249
+ </script>
250
+ </body>
251
+ </html>